From 0dd67c222114bc34d9d6f8f31eb9dc2386d22177 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Massot?= Date: Sun, 9 Jul 2023 07:44:34 +0900 Subject: [PATCH] Add multi indices search to elasticsearch + quickwit search endpoints. --- quickwit/quickwit-cli/tests/cli.rs | 4 +- quickwit/quickwit-config/src/lib.rs | 46 + .../quickwit-control-plane/src/scheduler.rs | 17 +- .../src/garbage_collection.rs | 4 +- .../quickwit-index-management/src/index.rs | 6 +- .../src/actors/indexing_pipeline.rs | 2 +- .../src/actors/indexing_service.rs | 10 +- .../src/actors/merge_executor.rs | 2 +- .../src/actors/merge_pipeline.rs | 4 +- quickwit/quickwit-indexing/src/lib.rs | 2 +- quickwit/quickwit-indexing/src/test_utils.rs | 37 +- quickwit/quickwit-jaeger/src/lib.rs | 4 +- .../src/actors/delete_task_pipeline.rs | 18 +- .../src/actors/delete_task_planner.rs | 39 +- .../src/actors/delete_task_service.rs | 4 +- .../src/actors/garbage_collector.rs | 32 +- .../src/actors/retention_policy_executor.rs | 22 +- quickwit/quickwit-metastore/src/error.rs | 6 +- quickwit/quickwit-metastore/src/lib.rs | 4 +- .../file_backed_index/mod.rs | 2 +- .../metastore/file_backed_metastore/mod.rs | 216 +++- .../file_backed_metastore/store_operations.rs | 8 +- .../metastore/grpc_metastore/grpc_adapter.rs | 8 +- .../src/metastore/grpc_metastore/mod.rs | 17 +- .../src/metastore/instrumented_metastore.rs | 21 +- .../metastore/metastore_event_publisher.rs | 12 +- .../quickwit-metastore/src/metastore/mod.rs | 48 +- .../src/metastore/postgresql_metastore.rs | 262 +++- .../src/metastore/retrying_metastore/mod.rs | 12 +- .../src/metastore/retrying_metastore/test.rs | 33 +- quickwit/quickwit-metastore/src/tests.rs | 140 ++- .../protos/quickwit/metastore.proto | 4 +- .../protos/quickwit/search.proto | 9 +- .../codegen/quickwit/quickwit.metastore.rs | 5 +- .../src/codegen/quickwit/quickwit.search.rs | 9 +- quickwit/quickwit-proto/src/lib.rs | 4 +- quickwit/quickwit-query/src/query_ast/mod.rs | 12 +- .../quickwit-search/src/cluster_client.rs | 9 +- quickwit/quickwit-search/src/error.rs | 14 +- quickwit/quickwit-search/src/leaf_cache.rs | 12 +- quickwit/quickwit-search/src/lib.rs | 31 +- quickwit/quickwit-search/src/retry/search.rs | 6 +- quickwit/quickwit-search/src/root.rs | 1120 ++++++++++++----- .../quickwit-search/src/scroll_context.rs | 13 +- .../quickwit-search/src/search_stream/leaf.rs | 10 +- .../quickwit-search/src/search_stream/root.rs | 98 +- quickwit/quickwit-search/src/service.rs | 6 +- quickwit/quickwit-search/src/tests.rs | 129 +- .../src/elastic_search_api/filter.rs | 20 +- .../src/elastic_search_api/mod.rs | 41 +- .../src/elastic_search_api/rest_handler.rs | 35 +- .../src/index_api/rest_handler.rs | 92 +- quickwit/quickwit-serve/src/lib.rs | 22 +- quickwit/quickwit-serve/src/rest.rs | 6 + quickwit/quickwit-serve/src/search_api/mod.rs | 29 +- .../src/search_api/rest_handler.rs | 165 ++- quickwit/rest-api-tests/README.md | 18 +- quickwit/rest-api-tests/run_tests.py | 5 +- .../0001-muti_indices_query.yaml | 19 + .../es_compatibility/multi-indices/_ctx.yaml | 2 + .../multi-indices/_setup.elasticsearch.yaml | 107 ++ .../multi-indices/_setup.quickwit.yaml | 80 ++ .../_teardown.elasticsearch.yaml | 5 + .../multi-indices/_teardown.quickwit.yaml | 7 + 64 files changed, 2275 insertions(+), 911 deletions(-) create mode 100644 quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/0001-muti_indices_query.yaml create mode 100644 quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_ctx.yaml create mode 100644 quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_setup.elasticsearch.yaml create mode 100644 quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_setup.quickwit.yaml create mode 100644 quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_teardown.elasticsearch.yaml create mode 100644 quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_teardown.quickwit.yaml diff --git a/quickwit/quickwit-cli/tests/cli.rs b/quickwit/quickwit-cli/tests/cli.rs index 8915540f846..7cd01ee7b06 100644 --- a/quickwit/quickwit-cli/tests/cli.rs +++ b/quickwit/quickwit-cli/tests/cli.rs @@ -187,8 +187,8 @@ async fn test_cmd_ingest_on_non_existing_index() { assert_eq!( error.root_cause().downcast_ref::().unwrap(), - &MetastoreError::IndexDoesNotExist { - index_id: "index-does-not-exist".to_string() + &MetastoreError::IndexesDoNotExist { + index_ids: vec!["index-does-not-exist".to_string()] } ); } diff --git a/quickwit/quickwit-config/src/lib.rs b/quickwit/quickwit-config/src/lib.rs index a116d296531..0537689f232 100644 --- a/quickwit/quickwit-config/src/lib.rs +++ b/quickwit/quickwit-config/src/lib.rs @@ -119,6 +119,38 @@ pub fn validate_identifier(label: &str, value: &str) -> anyhow::Result<()> { ); } +/// Checks whether an index ID pattern conforms to Elasticsearch/Quickwit object naming conventions. +/// Index id patterns accepts the same characters as identifiers, but also accepts `*` as a +/// wildcard. +pub fn validate_index_id_pattern(value: &str) -> anyhow::Result<()> { + static IDENTIFIER_REGEX_WITH_GLOB_PATTERN: OnceCell = OnceCell::new(); + + if !IDENTIFIER_REGEX_WITH_GLOB_PATTERN + .get_or_init(|| Regex::new(r"^[a-zA-Z\*][a-zA-Z0-9-_\.\*]{0,254}$").expect("Failed to compile regular expression. This should never happen! Please, report on https://github.com/quickwit-oss/quickwit/issues.")) + .is_match(value) + { + bail!( + "Index ID pattern `{value}` is invalid. Patterns must match the following regular \ + expression: `^[a-zA-Z\\*][a-zA-Z0-9-_\\.\\*]{{0,254}}$`." + ); + } + + // Forbid multiple stars in the pattern to force the user making simpler patterns + // as multiple stars does not bring any value. + if value.contains("**") { + bail!("Index ID pattern `{value}` is invalid. Patterns must not contain `**`."); + } + + // If there is no star in the pattern, we need at least 3 characters. + if !value.contains('*') && value.len() < 3 { + bail!( + "Index ID pattern `{value}` is invalid. An index ID must have at least 3 characters." + ); + } + + Ok(()) +} + pub fn validate_node_id(node_id: &str) -> anyhow::Result<()> { if !is_valid_hostname(node_id) { bail!( @@ -216,6 +248,7 @@ pub trait TestableForRegression: Serialize + DeserializeOwned { #[cfg(test)] mod tests { use super::validate_identifier; + use crate::validate_index_id_pattern; #[test] fn test_validate_identifier() { @@ -236,4 +269,17 @@ mod tests { .to_string() .contains("Cluster ID identifier `foo!` is invalid.")); } + + #[test] + fn test_validate_index_id_pattern() { + validate_index_id_pattern("*").unwrap(); + validate_index_id_pattern("abc.*").unwrap(); + validate_index_id_pattern("ab").unwrap_err(); + validate_index_id_pattern("").unwrap_err(); + validate_index_id_pattern("**").unwrap_err(); + assert!(validate_index_id_pattern("foo!") + .unwrap_err() + .to_string() + .contains("Index ID pattern `foo!` is invalid.")); + } } diff --git a/quickwit/quickwit-control-plane/src/scheduler.rs b/quickwit/quickwit-control-plane/src/scheduler.rs index d9adf1ee89e..ae64593410e 100644 --- a/quickwit/quickwit-control-plane/src/scheduler.rs +++ b/quickwit/quickwit-control-plane/src/scheduler.rs @@ -28,7 +28,7 @@ use async_trait::async_trait; use itertools::Itertools; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler}; use quickwit_config::SourceConfig; -use quickwit_metastore::Metastore; +use quickwit_metastore::{ListIndexesQuery, Metastore}; use quickwit_proto::control_plane::{NotifyIndexChangeRequest, NotifyIndexChangeResponse}; use quickwit_proto::indexing::{ApplyIndexingPlanRequest, IndexingService, IndexingTask}; use serde::Serialize; @@ -191,7 +191,10 @@ impl IndexingScheduler { } async fn fetch_source_configs(&self) -> anyhow::Result> { - let indexes_metadatas = self.metastore.list_indexes_metadatas().await?; + let indexes_metadatas = self + .metastore + .list_indexes_metadatas(ListIndexesQuery::All) + .await?; let source_configs: HashMap = indexes_metadatas .into_iter() .flat_map(|index_metadata| { @@ -530,7 +533,7 @@ mod tests { use quickwit_config::service::QuickwitService; use quickwit_config::{KafkaSourceParams, SourceConfig, SourceInputFormat, SourceParams}; use quickwit_indexing::IndexingService; - use quickwit_metastore::{IndexMetadata, MockMetastore}; + use quickwit_metastore::{IndexMetadata, ListIndexesQuery, MockMetastore}; use quickwit_proto::indexing::{ApplyIndexingPlanRequest, IndexingServiceClient, IndexingTask}; use serde_json::json; @@ -613,9 +616,11 @@ mod tests { let mut index_metadata_2 = index_metadata_for_test(index_2, source_2, 1, 1); index_metadata_2.create_timestamp = index_metadata_1.create_timestamp + 1; let mut metastore = MockMetastore::default(); - metastore - .expect_list_indexes_metadatas() - .returning(move || Ok(vec![index_metadata_2.clone(), index_metadata_1.clone()])); + metastore.expect_list_indexes_metadatas().returning( + move |_list_indexes_query: ListIndexesQuery| { + Ok(vec![index_metadata_2.clone(), index_metadata_1.clone()]) + }, + ); let mut indexer_inboxes = Vec::new(); let indexing_client_pool = Pool::default(); let change_stream = cluster.ready_nodes_change_stream().await; diff --git a/quickwit/quickwit-index-management/src/garbage_collection.rs b/quickwit/quickwit-index-management/src/garbage_collection.rs index d82af7905fa..57399fa73b9 100644 --- a/quickwit/quickwit-index-management/src/garbage_collection.rs +++ b/quickwit/quickwit-index-management/src/garbage_collection.rs @@ -652,8 +652,8 @@ mod tests { let mut mock_metastore = MockMetastore::new(); mock_metastore.expect_delete_splits().return_once(|_, _| { - Err(MetastoreError::IndexDoesNotExist { - index_id: index_id.to_string(), + Err(MetastoreError::IndexesDoNotExist { + index_ids: vec![index_id.to_string()], }) }); let metastore = Arc::new(mock_metastore); diff --git a/quickwit/quickwit-index-management/src/index.rs b/quickwit/quickwit-index-management/src/index.rs index 4aaf81ed759..7c54ccc0632 100644 --- a/quickwit/quickwit-index-management/src/index.rs +++ b/quickwit/quickwit-index-management/src/index.rs @@ -102,8 +102,8 @@ impl IndexService { if overwrite { match self.delete_index(&index_config.index_id, false).await { Ok(_) - | Err(IndexServiceError::MetastoreError(MetastoreError::IndexDoesNotExist { - index_id: _, + | Err(IndexServiceError::MetastoreError(MetastoreError::IndexesDoNotExist { + index_ids: _, })) => { // Ignore IndexDoesNotExist error. } @@ -442,7 +442,7 @@ mod tests { .await .unwrap_err(); assert!( - matches!(error, MetastoreError::IndexDoesNotExist { index_id } if index_id == index_uid.index_id()) + matches!(error, MetastoreError::IndexesDoNotExist { index_ids } if index_ids == vec![index_uid.index_id().to_string()]) ); assert!(!storage.exists(split_path).await.unwrap()); } diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index 8010a3b0cb9..c917c6eba1a 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -502,7 +502,7 @@ impl Handler for IndexingPipeline { } self.previous_generations_statistics.num_spawn_attempts = 1 + spawn.retry_count; if let Err(spawn_error) = self.spawn_pipeline(ctx).await { - if let Some(MetastoreError::IndexDoesNotExist { .. }) = + if let Some(MetastoreError::IndexesDoNotExist { .. }) = spawn_error.downcast_ref::() { info!(error = ?spawn_error, "Could not spawn pipeline, index might have been deleted."); diff --git a/quickwit/quickwit-indexing/src/actors/indexing_service.rs b/quickwit/quickwit-indexing/src/actors/indexing_service.rs index 34d12e1c248..a6d66631009 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_service.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_service.rs @@ -37,7 +37,7 @@ use quickwit_config::{ build_doc_mapper, IndexConfig, IndexerConfig, SourceConfig, INGEST_API_SOURCE_ID, }; use quickwit_ingest::{DropQueueRequest, IngestApiService, ListQueuesRequest, QUEUES_DIR_NAME}; -use quickwit_metastore::{IndexMetadata, Metastore}; +use quickwit_metastore::{IndexMetadata, ListIndexesQuery, Metastore}; use quickwit_proto::indexing::{ ApplyIndexingPlanRequest, ApplyIndexingPlanResponse, IndexingError, IndexingPipelineId, IndexingTask, @@ -602,7 +602,7 @@ impl IndexingService { let index_ids: HashSet = self .metastore - .list_indexes_metadatas() + .list_indexes_metadatas(ListIndexesQuery::All) .await .context("Failed to list queues")? .into_iter() @@ -1293,9 +1293,9 @@ mod tests { .insert(source_config.source_id.clone(), source_config.clone()); let mut metastore = MockMetastore::default(); let index_metadata_clone = index_metadata.clone(); - metastore - .expect_list_indexes_metadatas() - .returning(move || Ok(vec![index_metadata_clone.clone()])); + metastore.expect_list_indexes_metadatas().returning( + move |_list_indexes_query: ListIndexesQuery| Ok(vec![index_metadata_clone.clone()]), + ); metastore .expect_index_metadata() .returning(move |_| Ok(index_metadata.clone())); diff --git a/quickwit/quickwit-indexing/src/actors/merge_executor.rs b/quickwit/quickwit-indexing/src/actors/merge_executor.rs index 754f00e0a25..699010fdfac 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_executor.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_executor.rs @@ -692,7 +692,7 @@ mod tests { index_uid: index_uid.to_string(), start_timestamp: None, end_timestamp: None, - query_ast: quickwit_query::query_ast::qast_helper(delete_query, &["body"]), + query_ast: quickwit_query::query_ast::qast_string_helper(delete_query, &["body"]), }) .await?; let split_metadata = metastore diff --git a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs index 2b25cc2a881..a33b7e5214e 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs @@ -408,7 +408,7 @@ impl Handler for MergePipeline { } self.previous_generations_statistics.num_spawn_attempts = 1 + spawn.retry_count; if let Err(spawn_error) = self.spawn_pipeline(ctx).await { - if let Some(MetastoreError::IndexDoesNotExist { .. }) = + if let Some(MetastoreError::IndexesDoNotExist { .. }) = spawn_error.downcast_ref::() { info!(error = ?spawn_error, "Could not spawn pipeline, index might have been deleted."); @@ -471,7 +471,7 @@ mod tests { .expect_list_splits() .times(1) .returning(move |list_split_query| { - assert_eq!(list_split_query.index_uid, index_uid); + assert_eq!(list_split_query.index_uids, vec![index_uid.clone()]); assert_eq!( list_split_query.split_states, vec![quickwit_metastore::SplitState::Published] diff --git a/quickwit/quickwit-indexing/src/lib.rs b/quickwit/quickwit-indexing/src/lib.rs index d4cca6fb97d..f4422c3708c 100644 --- a/quickwit/quickwit-indexing/src/lib.rs +++ b/quickwit/quickwit-indexing/src/lib.rs @@ -48,7 +48,7 @@ mod split_store; mod test_utils; #[cfg(any(test, feature = "testsuite"))] -pub use test_utils::{mock_split, mock_split_meta, TestSandbox}; +pub use test_utils::{mock_split, mock_split_meta, MockSplitBuilder, TestSandbox}; use self::merge_policy::MergePolicy; pub use self::source::check_source_connectivity; diff --git a/quickwit/quickwit-indexing/src/test_utils.rs b/quickwit/quickwit-indexing/src/test_utils.rs index 3715df6929c..e7f293e9c53 100644 --- a/quickwit/quickwit-indexing/src/test_utils.rs +++ b/quickwit/quickwit-indexing/src/test_utils.rs @@ -218,19 +218,42 @@ impl TestSandbox { } } +/// Mock split builder. +pub struct MockSplitBuilder { + split_metadata: SplitMetadata, +} + +impl MockSplitBuilder { + pub fn new(split_id: &str) -> Self { + Self { + split_metadata: mock_split_meta(split_id, &IndexUid::new("test-index")), + } + } + + pub fn with_index_uid(mut self, index_uid: &IndexUid) -> Self { + self.split_metadata.index_uid = index_uid.clone(); + self + } + + pub fn build(self) -> Split { + Split { + split_state: SplitState::Published, + split_metadata: self.split_metadata, + update_timestamp: 0, + publish_timestamp: None, + } + } +} + /// Mock split helper. pub fn mock_split(split_id: &str) -> Split { - Split { - split_state: SplitState::Published, - split_metadata: mock_split_meta(split_id), - update_timestamp: 0, - publish_timestamp: None, - } + MockSplitBuilder::new(split_id).build() } /// Mock split meta helper. -pub fn mock_split_meta(split_id: &str) -> SplitMetadata { +pub fn mock_split_meta(split_id: &str, index_uid: &IndexUid) -> SplitMetadata { SplitMetadata { + index_uid: index_uid.clone(), split_id: split_id.to_string(), partition_id: 13u64, num_docs: 10, diff --git a/quickwit/quickwit-jaeger/src/lib.rs b/quickwit/quickwit-jaeger/src/lib.rs index d1e0b8ef54a..96e9f102fdf 100644 --- a/quickwit/quickwit-jaeger/src/lib.rs +++ b/quickwit/quickwit-jaeger/src/lib.rs @@ -255,7 +255,7 @@ impl JaegerService { let aggregation_query = build_aggregations_query(trace_query.num_traces as usize); let max_hits = 0; let search_request = SearchRequest { - index_id, + index_id_patterns: vec![index_id], query_ast, aggregation_request: Some(aggregation_query), max_hits, @@ -303,7 +303,7 @@ impl JaegerService { serde_json::to_string(&query_ast).map_err(|err| Status::internal(err.to_string()))?; let search_request = SearchRequest { - index_id: OTEL_TRACES_INDEX_ID.to_string(), + index_id_patterns: vec![OTEL_TRACES_INDEX_ID.to_string()], query_ast, start_timestamp: Some(*search_window.start()), end_timestamp: Some(*search_window.end()), diff --git a/quickwit/quickwit-janitor/src/actors/delete_task_pipeline.rs b/quickwit/quickwit-janitor/src/actors/delete_task_pipeline.rs index 13c442e2017..3b9e2e681a4 100644 --- a/quickwit/quickwit-janitor/src/actors/delete_task_pipeline.rs +++ b/quickwit/quickwit-janitor/src/actors/delete_task_pipeline.rs @@ -356,7 +356,7 @@ mod tests { index_uid: index_uid.to_string(), start_timestamp: None, end_timestamp: None, - query_ast: quickwit_query::query_ast::qast_helper("body:delete", &[]), + query_ast: quickwit_query::query_ast::qast_string_helper("body:delete", &[]), }) .await .unwrap(); @@ -365,8 +365,12 @@ mod tests { mock_search_service .expect_leaf_search() .withf(|leaf_request| -> bool { - leaf_request.search_request.as_ref().unwrap().index_id - == "test-delete-pipeline-simple" + leaf_request + .search_request + .as_ref() + .unwrap() + .index_id_patterns + == vec!["test-delete-pipeline-simple".to_string()] }) .times(2) .returning(move |_: LeafSearchRequest| { @@ -441,8 +445,12 @@ mod tests { mock_search_service .expect_leaf_search() .withf(|leaf_request| -> bool { - leaf_request.search_request.as_ref().unwrap().index_id - == "test-delete-pipeline-shut-down" + leaf_request + .search_request + .as_ref() + .unwrap() + .index_id_patterns + == vec!["test-delete-pipeline-shut-down".to_string()] }) .returning(move |_: LeafSearchRequest| { Ok(LeafSearchResponse { diff --git a/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs b/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs index d775a61cb5e..40bf168e731 100644 --- a/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs +++ b/quickwit/quickwit-janitor/src/actors/delete_task_planner.rs @@ -17,7 +17,7 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::sync::Arc; use std::time::Duration; @@ -34,7 +34,7 @@ use quickwit_metastore::{ }; use quickwit_proto::metastore::DeleteTask; use quickwit_proto::{IndexUid, SearchRequest}; -use quickwit_search::{jobs_to_leaf_request, SearchJob, SearchJobPlacer}; +use quickwit_search::{jobs_to_leaf_requests, IndexMetasForLeafSearch, SearchJob, SearchJobPlacer}; use serde::Serialize; use tantivy::Inventory; use tracing::{debug, info}; @@ -291,25 +291,35 @@ impl DeleteTaskPlanner { .delete_query .as_ref() .expect("Delete task must have a delete query."); + // TODO: resolve with the default fields. let search_request = SearchRequest { - index_id: IndexUid::from(delete_query.index_uid.clone()) + index_id_patterns: vec![IndexUid::from(delete_query.index_uid.clone()) .index_id() - .to_string(), + .to_string()], query_ast: delete_query.query_ast.clone(), start_timestamp: delete_query.start_timestamp, end_timestamp: delete_query.end_timestamp, ..Default::default() }; - let leaf_search_request = jobs_to_leaf_request( + let mut search_indexes_metas = HashMap::new(); + search_indexes_metas.insert( + IndexUid::from(delete_query.index_uid.clone()), + IndexMetasForLeafSearch { + doc_mapper_str: doc_mapper_str.to_string(), + index_uri: Uri::from_well_formed(index_uri), + }, + ); + let leaf_search_request = jobs_to_leaf_requests( &search_request, - doc_mapper_str, - index_uri, + &search_indexes_metas, vec![search_job.clone()], - ); - let response = search_client.leaf_search(leaf_search_request).await?; - ctx.record_progress(); - if response.num_hits > 0 { - return Ok(true); + )?; + for leaf_request in leaf_search_request { + let response = search_client.leaf_search(leaf_request).await?; + ctx.record_progress(); + if response.num_hits > 0 { + return Ok(true); + } } } Ok(false) @@ -457,8 +467,9 @@ mod tests { // Creates 2 delete tasks, one that will match 1 document, // the other that will match no document. - let body_delete_ast = quickwit_query::query_ast::qast_helper("body:delete", &[]); - let match_nothing_ast = quickwit_query::query_ast::qast_helper("body:matchnothing", &[]); + let body_delete_ast = quickwit_query::query_ast::qast_string_helper("body:delete", &[]); + let match_nothing_ast = + quickwit_query::query_ast::qast_string_helper("body:matchnothing", &[]); metastore .create_delete_task(DeleteQuery { index_uid: index_uid.to_string(), diff --git a/quickwit/quickwit-janitor/src/actors/delete_task_service.rs b/quickwit/quickwit-janitor/src/actors/delete_task_service.rs index 8102e5256a6..3dd5b8a34af 100644 --- a/quickwit/quickwit-janitor/src/actors/delete_task_service.rs +++ b/quickwit/quickwit-janitor/src/actors/delete_task_service.rs @@ -26,7 +26,7 @@ use async_trait::async_trait; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, ActorHandle, Handler}; use quickwit_common::temp_dir::{self}; use quickwit_config::IndexConfig; -use quickwit_metastore::Metastore; +use quickwit_metastore::{ListIndexesQuery, Metastore}; use quickwit_proto::IndexUid; use quickwit_search::SearchJobPlacer; use quickwit_storage::StorageResolver; @@ -108,7 +108,7 @@ impl DeleteTaskService { ) -> anyhow::Result<()> { let mut index_config_by_index_id: HashMap = self .metastore - .list_indexes_metadatas() + .list_indexes_metadatas(ListIndexesQuery::All) .await? .into_iter() .map(|index_metadata| { diff --git a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs index b3411e28033..9a115f34c92 100644 --- a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs +++ b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs @@ -29,7 +29,7 @@ use quickwit_actors::{Actor, ActorContext, Handler}; // use quickwit_index_management::run_garbage_collect; use quickwit_common::shared_consts::DELETION_GRACE_PERIOD; use quickwit_index_management::run_garbage_collect; -use quickwit_metastore::Metastore; +use quickwit_metastore::{ListIndexesQuery, Metastore}; use quickwit_storage::StorageResolver; use serde::Serialize; use tracing::{error, info}; @@ -86,7 +86,11 @@ impl GarbageCollector { info!("garbage-collect-operation"); self.counters.num_passes += 1; - let indexes = match self.metastore.list_indexes_metadatas().await { + let indexes = match self + .metastore + .list_indexes_metadatas(ListIndexesQuery::All) + .await + { Ok(metadatas) => metadatas, Err(error) => { error!(error=?error, "Failed to list indexes from the metastore."); @@ -254,7 +258,7 @@ mod tests { .times(2) .returning(|query: ListSplitsQuery| { assert_eq!( - query.index_uid.to_string(), + query.index_uids[0].to_string(), "test-index:11111111111111111111111111" ); let splits = match query.split_states[0] { @@ -327,7 +331,7 @@ mod tests { mock_metastore .expect_list_indexes_metadatas() .times(1) - .returning(|| { + .returning(move |_list_indexes_query: ListIndexesQuery| { Ok(vec![IndexMetadata::for_test( "test-index", "ram://indexes/test-index", @@ -337,7 +341,7 @@ mod tests { .expect_list_splits() .times(2) .returning(|query| { - assert_eq!(query.index_uid.index_id(), "test-index"); + assert_eq!(query.index_uids[0].index_id(), "test-index"); let splits = match query.split_states[0] { SplitState::Staged => make_splits(&["a"], SplitState::Staged), SplitState::MarkedForDeletion => { @@ -388,7 +392,7 @@ mod tests { mock_metastore .expect_list_indexes_metadatas() .times(3) - .returning(|| { + .returning(move |_list_indexes_query: ListIndexesQuery| { Ok(vec![IndexMetadata::for_test( "test-index", "ram://indexes/test-index", @@ -398,7 +402,7 @@ mod tests { .expect_list_splits() .times(6) .returning(|query| { - assert_eq!(query.index_uid.index_id(), "test-index"); + assert_eq!(query.index_uids[0].index_id(), "test-index"); let splits = match query.split_states[0] { SplitState::Staged => make_splits(&["a"], SplitState::Staged), SplitState::MarkedForDeletion => { @@ -474,7 +478,7 @@ mod tests { mock_metastore .expect_list_indexes_metadatas() .times(4) - .returning(|| { + .returning(move |_list_indexes_query: ListIndexesQuery| { Err(MetastoreError::DbError { message: "Fail to list indexes.".to_string(), }) @@ -505,7 +509,7 @@ mod tests { mock_metastore .expect_list_indexes_metadatas() .times(1) - .returning(|| { + .returning(move |_list_indexes_query: ListIndexesQuery| { Ok(vec![IndexMetadata::for_test( "test-index", "postgresql://indexes/test-index", @@ -535,7 +539,7 @@ mod tests { mock_metastore .expect_list_indexes_metadatas() .times(1) - .returning(|| { + .returning(move |_list_indexes_query: ListIndexesQuery| { Ok(vec![ IndexMetadata::for_test("test-index-1", "ram:///indexes/test-index-1"), IndexMetadata::for_test("test-index-2", "ram:///indexes/test-index-2"), @@ -545,9 +549,9 @@ mod tests { .expect_list_splits() .times(3) .returning(|query| { - assert!(["test-index-1", "test-index-2"].contains(&query.index_uid.index_id())); + assert!(["test-index-1", "test-index-2"].contains(&query.index_uids[0].index_id())); - if query.index_uid.index_id() == "test-index-2" { + if query.index_uids[0].index_id() == "test-index-2" { return Err(MetastoreError::DbError { message: "fail to delete".to_string(), }); @@ -604,7 +608,7 @@ mod tests { mock_metastore .expect_list_indexes_metadatas() .times(1) - .returning(|| { + .returning(move |_list_indexes_query: ListIndexesQuery| { Ok(vec![ IndexMetadata::for_test("test-index-1", "ram://indexes/test-index-1"), IndexMetadata::for_test("test-index-2", "ram://indexes/test-index-2"), @@ -614,7 +618,7 @@ mod tests { .expect_list_splits() .times(4) .returning(|query| { - assert!(["test-index-1", "test-index-2"].contains(&query.index_uid.index_id())); + assert!(["test-index-1", "test-index-2"].contains(&query.index_uids[0].index_id())); let splits = match query.split_states[0] { SplitState::Staged => make_splits(&["a"], SplitState::Staged), SplitState::MarkedForDeletion => { diff --git a/quickwit/quickwit-janitor/src/actors/retention_policy_executor.rs b/quickwit/quickwit-janitor/src/actors/retention_policy_executor.rs index e25638bec84..80279b3f1a4 100644 --- a/quickwit/quickwit-janitor/src/actors/retention_policy_executor.rs +++ b/quickwit/quickwit-janitor/src/actors/retention_policy_executor.rs @@ -25,7 +25,7 @@ use async_trait::async_trait; use itertools::Itertools; use quickwit_actors::{Actor, ActorContext, Handler}; use quickwit_config::IndexConfig; -use quickwit_metastore::Metastore; +use quickwit_metastore::{ListIndexesQuery, Metastore}; use quickwit_proto::IndexUid; use serde::Serialize; use tracing::{debug, error, info}; @@ -81,7 +81,11 @@ impl RetentionPolicyExecutor { debug!("retention-policy-refresh-indexes-operation"); self.counters.num_refresh_passes += 1; - let index_metadatas = match self.metastore.list_indexes_metadatas().await { + let index_metadatas = match self + .metastore + .list_indexes_metadatas(ListIndexesQuery::All) + .await + { Ok(metadatas) => metadatas, Err(error) => { error!(error=?error, "Failed to list indexes from the metastore."); @@ -248,7 +252,9 @@ mod tests { use mockall::Sequence; use quickwit_actors::Universe; use quickwit_config::RetentionPolicy; - use quickwit_metastore::{IndexMetadata, MockMetastore, Split, SplitMetadata, SplitState}; + use quickwit_metastore::{ + IndexMetadata, ListIndexesQuery, MockMetastore, Split, SplitMetadata, SplitState, + }; use super::*; @@ -341,7 +347,7 @@ mod tests { .expect_list_indexes_metadatas() .times(1) .in_sequence(&mut sequence) - .returning(|| { + .returning(|_list_indexes_query: ListIndexesQuery| { Ok(make_indexes(&[ ("a", Some("1 hour")), ("b", Some("1 hour")), @@ -353,7 +359,7 @@ mod tests { .expect_list_indexes_metadatas() .times(1) .in_sequence(&mut sequence) - .returning(|| { + .returning(|_list_indexes_query: ListIndexesQuery| { Ok(make_indexes(&[ ("a", Some("1 hour")), ("b", Some("2 hour")), @@ -365,7 +371,7 @@ mod tests { .expect_list_indexes_metadatas() .times(1) .in_sequence(&mut sequence) - .returning(|| { + .returning(|_list_indexes_query: ListIndexesQuery| { Ok(make_indexes(&[ ("b", Some("1 hour")), ("d", Some("1 hour")), @@ -417,7 +423,7 @@ mod tests { mock_metastore .expect_list_indexes_metadatas() .times(..) - .returning(|| { + .returning(|_list_indexes_query: ListIndexesQuery| { Ok(make_indexes(&[ ("a", Some("2 hour")), ("b", Some("1 hour")), @@ -430,7 +436,7 @@ mod tests { .times(2..=4) .returning(|query| { assert_eq!(query.split_states, &[SplitState::Published]); - let splits = match query.index_uid.index_id() { + let splits = match query.index_uids[0].index_id() { "a" => { vec![ make_split("split-1", Some(1000..=5000)), diff --git a/quickwit/quickwit-metastore/src/error.rs b/quickwit/quickwit-metastore/src/error.rs index 4457316dd08..2de40b743fb 100644 --- a/quickwit/quickwit-metastore/src/error.rs +++ b/quickwit/quickwit-metastore/src/error.rs @@ -36,8 +36,8 @@ pub enum MetastoreError { #[error("Access forbidden: `{message}`.")] Forbidden { message: String }, - #[error("Index `{index_id}` does not exist.")] - IndexDoesNotExist { index_id: String }, + #[error("Indexes `{index_ids:?}` do not exist.")] + IndexesDoNotExist { index_ids: Vec }, /// Any generic internal error. /// The message can be helpful to users, but the detail of the error @@ -113,7 +113,7 @@ impl ServiceError for MetastoreError { Self::Forbidden { .. } => ServiceErrorCode::MethodNotAllowed, Self::IncompatibleCheckpointDelta(_) => ServiceErrorCode::BadRequest, Self::IndexAlreadyExists { .. } => ServiceErrorCode::BadRequest, - Self::IndexDoesNotExist { .. } => ServiceErrorCode::NotFound, + Self::IndexesDoNotExist { .. } => ServiceErrorCode::NotFound, Self::InternalError { .. } => ServiceErrorCode::Internal, Self::InvalidManifest { .. } => ServiceErrorCode::Internal, Self::Io { .. } => ServiceErrorCode::Internal, diff --git a/quickwit/quickwit-metastore/src/lib.rs b/quickwit/quickwit-metastore/src/lib.rs index 788fea5df8f..f8c8fe0ef27 100644 --- a/quickwit/quickwit-metastore/src/lib.rs +++ b/quickwit/quickwit-metastore/src/lib.rs @@ -51,7 +51,9 @@ pub use metastore::postgresql_metastore::PostgresqlMetastore; pub use metastore::retrying_metastore::RetryingMetastore; #[cfg(any(test, feature = "testsuite"))] pub use metastore::MockMetastore; -pub use metastore::{file_backed_metastore, IndexMetadata, ListSplitsQuery, Metastore}; +pub use metastore::{ + file_backed_metastore, IndexMetadata, ListIndexesQuery, ListSplitsQuery, Metastore, +}; pub use metastore_factory::{MetastoreFactory, UnsupportedMetastore}; pub use metastore_resolver::MetastoreResolver; use quickwit_common::is_disjoint; diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed_metastore/file_backed_index/mod.rs b/quickwit/quickwit-metastore/src/metastore/file_backed_metastore/file_backed_index/mod.rs index a91308367f7..3f8b7944f86 100644 --- a/quickwit/quickwit-metastore/src/metastore/file_backed_metastore/file_backed_index/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/file_backed_metastore/file_backed_index/mod.rs @@ -88,7 +88,7 @@ impl quickwit_config::TestableForRegression for FileBackedIndex { index_uid: "index:11111111111111111111111111".to_string(), start_timestamp: None, end_timestamp: None, - query_ast: quickwit_query::query_ast::qast_helper("Harry Potter", &["body"]), + query_ast: quickwit_query::query_ast::qast_string_helper("Harry Potter", &["body"]), }), }; FileBackedIndex::new(index_metadata, splits, vec![delete_task]) diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed_metastore/mod.rs b/quickwit/quickwit-metastore/src/metastore/file_backed_metastore/mod.rs index fd8e60730ea..ecb95de1b0b 100644 --- a/quickwit/quickwit-metastore/src/metastore/file_backed_metastore/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/file_backed_metastore/mod.rs @@ -33,10 +33,11 @@ use std::time::Duration; use async_trait::async_trait; use futures::future::try_join_all; use quickwit_common::uri::Uri; -use quickwit_config::{IndexConfig, SourceConfig}; +use quickwit_config::{validate_index_id_pattern, IndexConfig, SourceConfig}; use quickwit_proto::metastore::{DeleteQuery, DeleteTask}; use quickwit_proto::IndexUid; use quickwit_storage::Storage; +use regex::Regex; use tokio::sync::{Mutex, OwnedMutexGuard, RwLock}; use self::file_backed_index::FileBackedIndex; @@ -46,6 +47,7 @@ use self::store_operations::{ check_indexes_states_exist, delete_index, fetch_index, fetch_or_init_indexes_states, index_exists, put_index, put_indexes_states, }; +use super::ListIndexesQuery; use crate::checkpoint::IndexCheckpointDelta; use crate::{ IndexMetadata, ListSplitsQuery, Metastore, MetastoreError, MetastoreResult, Split, @@ -163,8 +165,8 @@ impl FileBackedMetastore { let index_id = index_uid.index_id(); let mut locked_index = self.get_locked_index(index_id).await?; if locked_index.index_uid() != index_uid { - return Err(MetastoreError::IndexDoesNotExist { - index_id: index_id.to_string(), + return Err(MetastoreError::IndexesDoNotExist { + index_ids: vec![index_id.to_string()], }); } let mut index = locked_index.clone(); @@ -212,8 +214,8 @@ impl FileBackedMetastore { if locked_index.index_uid() == index_uid { view(&locked_index) } else { - Err(MetastoreError::IndexDoesNotExist { - index_id: index_id.to_string(), + Err(MetastoreError::IndexesDoNotExist { + index_ids: vec![index_id.to_string()], }) } } @@ -377,8 +379,8 @@ impl Metastore for FileBackedMetastore { if !per_index_metastores_wlock.contains_key(index_id) && !index_exists(&*self.storage, index_id).await? { - return Err(MetastoreError::IndexDoesNotExist { - index_id: index_id.to_string(), + return Err(MetastoreError::IndexesDoNotExist { + index_ids: vec![index_id.to_string()], }); } @@ -402,7 +404,7 @@ impl Metastore for FileBackedMetastore { Ok(()) | // If the index file does not exist, we still need to return an error, // but it makes sense to ensure that the index state is removed. - Err(MetastoreError::IndexDoesNotExist { .. }) => { + Err(MetastoreError::IndexesDoNotExist { .. }) => { per_index_metastores_wlock.remove(index_id); if let Err(error) = put_indexes_states(&*self.storage, &per_index_metastores_wlock).await { per_index_metastores_wlock.insert(index_id.to_string(), IndexState::Deleting); @@ -546,9 +548,14 @@ impl Metastore for FileBackedMetastore { /// Read-only accessors async fn list_splits(&self, query: ListSplitsQuery) -> MetastoreResult> { - let query_clone = query.clone(); - self.read(query.index_uid, |index| index.list_splits(query_clone)) - .await + let mut splits = Vec::new(); + for index_uid in query.index_uids.iter() { + let index_splits = self + .read(index_uid.clone(), |index| index.list_splits(query.clone())) + .await?; + splits.extend(index_splits); + } + Ok(splits) } async fn index_metadata(&self, index_id: &str) -> MetastoreResult { @@ -556,12 +563,26 @@ impl Metastore for FileBackedMetastore { .await } - async fn list_indexes_metadatas(&self) -> MetastoreResult> { + async fn list_indexes_metadatas( + &self, + query: ListIndexesQuery, + ) -> MetastoreResult> { // Done in two steps: // 1) Get index IDs and release the lock on `per_index_metastores`. // 2) Get each index metadata. Note that each get will take a read lock on // `per_index_metastores`. Lock is released in 1) to let a concurrent task/thread to // take a write lock on `per_index_metastores`. + let index_matcher = match query { + ListIndexesQuery::IndexIdPatterns(patterns) => { + IndexIdPatternsMatcher::try_from_patterns(patterns).map_err(|error| { + MetastoreError::InternalError { + message: "Failed to build `IndexIdPatternsMatcher`".to_string(), + cause: error.to_string(), + } + })? + } + ListIndexesQuery::All => IndexIdPatternsMatcher::all(), + }; let index_ids: Vec = { let per_index_metastores_rlock = self.per_index_metastores.read().await; per_index_metastores_rlock @@ -570,6 +591,7 @@ impl Metastore for FileBackedMetastore { IndexState::Alive(_) => Some(index_id), _ => None, }) + .filter(|index_id| index_matcher.matches(index_id)) .cloned() .collect() }; @@ -577,7 +599,7 @@ impl Metastore for FileBackedMetastore { try_join_all(index_ids.iter().map(|index_id| async move { match self.index_metadata(index_id).await { Ok(index_metadata) => Ok(Some(index_metadata)), - Err(MetastoreError::IndexDoesNotExist { index_id: _ }) => Ok(None), + Err(MetastoreError::IndexesDoNotExist { index_ids: _ }) => Ok(None), Err(MetastoreError::InternalError { message, cause }) => { // Indexes can be in a transition state `Creating` or `Deleting`. // This is fine to ignore them. @@ -676,6 +698,76 @@ async fn get_index_mutex( } } +/// Index ID patterns matcher which matches one of the given patterns with the following +/// rules: +/// - If the given pattern does not contain a `*` char, it matches the exact pattern. +/// - If the given pattern contains one or more `*`, it matches the regex built from a regex where +/// `*` is replaced by `.*`. All other regular expression meta characters are escaped. +struct IndexIdPatternsMatcher { + match_all: bool, + index_ids: Vec, + regexes: Vec, +} + +impl IndexIdPatternsMatcher { + fn all() -> Self { + Self { + match_all: true, + index_ids: Vec::new(), + regexes: Vec::new(), + } + } + + fn try_from_patterns(patterns: Vec) -> anyhow::Result { + let mut index_ids = Vec::new(); + let mut regexes = Vec::new(); + // If there is a match all pattern, no need to go further. + if patterns.iter().any(|pattern| pattern == "*") { + return Ok(Self::all()); + } + for index_pattern in patterns { + if index_pattern.contains('*') { + let regex = build_regex_from_pattern(&index_pattern)?; + regexes.push(regex); + } else { + index_ids.push(index_pattern); + } + } + Ok(Self { + match_all: false, + index_ids, + regexes, + }) + } + + fn matches(&self, index_id: &str) -> bool { + if self.match_all { + return true; + } + + self.index_ids.iter().any(|x| x == index_id) + || self.regexes.iter().any(|regex| regex.is_match(index_id)) + } +} + +/// Convert the tokens into a valid regex. +fn build_regex_from_pattern(index_pattern: &str) -> anyhow::Result { + // Note: consecutive '*' are not allowed in the pattern. + validate_index_id_pattern(index_pattern)?; + let mut re: String = String::new(); + re.push('^'); + for tok in index_pattern.chars() { + if tok == '*' { + re.push_str(".*"); + } else { + re.push_str(®ex::escape(&tok.to_string())); + } + } + re.push('$'); + let regex = Regex::new(&re).expect("Regex compilation shouldn't fail"); + Ok(regex) +} + #[cfg(test)] #[async_trait] impl crate::tests::test_suite::DefaultForTest for FileBackedMetastore { @@ -699,7 +791,7 @@ mod tests { use futures::executor::block_on; use quickwit_config::IndexConfig; use quickwit_proto::metastore::DeleteQuery; - use quickwit_query::query_ast::qast_helper; + use quickwit_query::query_ast::qast_string_helper; use quickwit_storage::{MockStorage, RamStorage, Storage, StorageErrorKind}; use rand::Rng; use time::OffsetDateTime; @@ -768,7 +860,10 @@ mod tests { ); // Check index is returned by list indexes. - let indexes = metastore.list_indexes_metadatas().await.unwrap(); + let indexes = metastore + .list_indexes_metadatas(ListIndexesQuery::All) + .await + .unwrap(); assert_eq!(indexes.len(), 1); // Open a non-existent index. @@ -778,7 +873,7 @@ mod tests { .unwrap_err(); assert!(matches!( metastore_error, - MetastoreError::IndexDoesNotExist { .. } + MetastoreError::IndexesDoNotExist { .. } )); // Open a index with a different incarnation_id. @@ -788,7 +883,7 @@ mod tests { .unwrap_err(); assert!(matches!( metastore_error, - MetastoreError::IndexDoesNotExist { .. } + MetastoreError::IndexesDoNotExist { .. } )); } @@ -1064,7 +1159,10 @@ mod tests { { let metastore = metastore.clone(); let handle = tokio::spawn(async move { - metastore.list_indexes_metadatas().await.unwrap(); + metastore + .list_indexes_metadatas(ListIndexesQuery::All) + .await + .unwrap(); }); handles.push(handle); } @@ -1120,7 +1218,7 @@ mod tests { .unwrap_err(); assert!(matches!( created_index_error, - MetastoreError::IndexDoesNotExist { .. } + MetastoreError::IndexesDoNotExist { .. } )); } @@ -1182,7 +1280,7 @@ mod tests { let deleted_index_error = metastore.delete_index(index_uid.clone()).await.unwrap_err(); assert!(matches!( deleted_index_error, - MetastoreError::IndexDoesNotExist { .. } + MetastoreError::IndexesDoNotExist { .. } )); let index_states = fetch_or_init_indexes_states(Arc::new(ram_storage_clone_2), None) .await @@ -1192,7 +1290,7 @@ mod tests { let created_index_error = metastore.get_index(index_uid).await.unwrap_err(); assert!(matches!( created_index_error, - MetastoreError::IndexDoesNotExist { .. } + MetastoreError::IndexesDoNotExist { .. } )); } @@ -1392,7 +1490,10 @@ mod tests { let metastore = FileBackedMetastore::try_new(ram_storage.clone(), None) .await .unwrap(); - let indexes_metadatas = metastore.list_indexes_metadatas().await.unwrap(); + let indexes_metadatas = metastore + .list_indexes_metadatas(ListIndexesQuery::All) + .await + .unwrap(); assert_eq!(indexes_metadatas.len(), 1); // Fetch the index metadata not registered in indexes states json. @@ -1403,7 +1504,10 @@ mod tests { // Now list indexes return 2 indexes metadatas as the metastore is now aware of // 2 alive indexes. - let indexes_metadatas = metastore.list_indexes_metadatas().await.unwrap(); + let indexes_metadatas = metastore + .list_indexes_metadatas(ListIndexesQuery::All) + .await + .unwrap(); assert_eq!(indexes_metadatas.len(), 2); // Let's delete indexes. @@ -1412,7 +1516,10 @@ mod tests { .delete_index(index_uid_unregistered) .await .unwrap(); - let no_more_indexes = metastore.list_indexes_metadatas().await.unwrap(); + let no_more_indexes = metastore + .list_indexes_metadatas(ListIndexesQuery::All) + .await + .unwrap(); assert!(no_more_indexes.is_empty()); Ok(()) @@ -1434,7 +1541,7 @@ mod tests { start_timestamp: None, end_timestamp: None, index_uid: index_uid.to_string(), - query_ast: qast_helper("harry potter", &["body"]), + query_ast: qast_string_helper("harry potter", &["body"]), }; let delete_task_1 = metastore @@ -1469,7 +1576,7 @@ mod tests { start_timestamp: None, end_timestamp: None, index_uid: index_uid.to_string(), - query_ast: qast_helper("harry potter", &["body"]), + query_ast: qast_string_helper("harry potter", &["body"]), }; let delete_task_4 = metastore .create_delete_task(delete_query.clone()) @@ -1477,4 +1584,61 @@ mod tests { .unwrap(); assert_eq!(delete_task_4.opstamp, 1); } + #[test] + fn test_build_regexes_from_pattern() { + assert_eq!(build_regex_from_pattern("*").unwrap().to_string(), r"^.*$",); + assert_eq!( + build_regex_from_pattern("index-1").unwrap().to_string(), + r"^index\-1$", + ); + assert_eq!( + build_regex_from_pattern("*-index-*-1").unwrap().to_string(), + r"^.*\-index\-.*\-1$", + ); + assert_eq!( + build_regex_from_pattern("INDEX.2*-1").unwrap().to_string(), + r"^INDEX\.2.*\-1$", + ); + // test with invalid pattern + assert_eq!( + &build_regex_from_pattern("index-**-1") + .unwrap_err() + .to_string(), + "Index ID pattern `index-**-1` is invalid. Patterns must not contain `**`.", + ); + assert!(build_regex_from_pattern("-index-1").is_err()); + } + + #[test] + fn test_index_ids_patterns_matcher() { + { + let matcher = IndexIdPatternsMatcher::try_from_patterns(vec![ + "index-1".to_string(), + "index-2".to_string(), + "*-index-pattern-1-*".to_string(), + "*.index.pattern.*.2-*".to_string(), + ]) + .unwrap(); + + assert!(matcher.matches("index-1")); + assert!(matcher.matches("index-2")); + assert!(matcher.matches("abc-index-pattern-1-1")); + assert!(matcher.matches("def-index-pattern-1-2")); + assert!(matcher.matches("ghi.index.pattern.1.2-1")); + assert!(matcher.matches("jkl.index.pattern.1.2-bignumber")); + assert!(!matcher.matches("index-3")); + assert!(!matcher.matches("index.pattern.1.2-1")); + } + { + let matcher = IndexIdPatternsMatcher::try_from_patterns(vec![ + "index-1".to_string(), + "*".to_string(), + ]) + .unwrap(); + + assert!(matcher.matches("index-1")); + assert!(matcher.matches("index-2")); + assert!(matcher.matches("abc-index-pattern-1-1")); + } + } } diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed_metastore/store_operations.rs b/quickwit/quickwit-metastore/src/metastore/file_backed_metastore/store_operations.rs index 403db2092d0..c67ebb327c6 100644 --- a/quickwit/quickwit-metastore/src/metastore/file_backed_metastore/store_operations.rs +++ b/quickwit/quickwit-metastore/src/metastore/file_backed_metastore/store_operations.rs @@ -60,8 +60,8 @@ pub(crate) fn meta_path(index_id: &str) -> PathBuf { fn convert_error(index_id: &str, storage_err: StorageError) -> MetastoreError { match storage_err.kind() { - StorageErrorKind::NotFound => MetastoreError::IndexDoesNotExist { - index_id: index_id.to_string(), + StorageErrorKind::NotFound => MetastoreError::IndexesDoNotExist { + index_ids: vec![index_id.to_string()], }, StorageErrorKind::Unauthorized => MetastoreError::Forbidden { message: "The request credentials do not allow for this operation.".to_string(), @@ -234,8 +234,8 @@ pub(crate) async fn delete_index(storage: &dyn Storage, index_id: &str) -> Metas .map_err(|storage_err| convert_error(index_id, storage_err))?; if !file_exists { - return Err(MetastoreError::IndexDoesNotExist { - index_id: index_id.to_string(), + return Err(MetastoreError::IndexesDoNotExist { + index_ids: vec![index_id.to_string()], }); } diff --git a/quickwit/quickwit-metastore/src/metastore/grpc_metastore/grpc_adapter.rs b/quickwit/quickwit-metastore/src/metastore/grpc_metastore/grpc_adapter.rs index da1c4c10427..b02392c2e1b 100644 --- a/quickwit/quickwit-metastore/src/metastore/grpc_metastore/grpc_adapter.rs +++ b/quickwit/quickwit-metastore/src/metastore/grpc_metastore/grpc_adapter.rs @@ -103,7 +103,13 @@ impl grpc::MetastoreService for GrpcMetastoreAdapter { request: tonic::Request, ) -> Result, tonic::Status> { set_parent_span_from_request_metadata(request.metadata()); - let indexes_metadatas = self.0.list_indexes_metadatas().await?; + let query = serde_json::from_str(&request.into_inner().filter_json).map_err(|error| { + MetastoreError::JsonSerializeError { + struct_name: "ListIndexesQuery".to_string(), + message: error.to_string(), + } + })?; + let indexes_metadatas = self.0.list_indexes_metadatas(query).await?; let list_indexes_metadatas_reply = serde_json::to_string(&indexes_metadatas) .map( |indexes_metadatas_serialized_json| ListIndexesMetadatasResponse { diff --git a/quickwit/quickwit-metastore/src/metastore/grpc_metastore/mod.rs b/quickwit/quickwit-metastore/src/metastore/grpc_metastore/mod.rs index 59b0c9a573a..b1ae60e9e31 100644 --- a/quickwit/quickwit-metastore/src/metastore/grpc_metastore/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/grpc_metastore/mod.rs @@ -45,8 +45,8 @@ use tower::timeout::error::Elapsed; use crate::checkpoint::IndexCheckpointDelta; use crate::{ - IndexMetadata, ListSplitsQuery, Metastore, MetastoreError, MetastoreResult, Split, - SplitMetadata, + IndexMetadata, ListIndexesQuery, ListSplitsQuery, Metastore, MetastoreError, MetastoreResult, + Split, SplitMetadata, }; // URI describing in a generic way the metastore services resource present in the cluster (= @@ -159,11 +159,20 @@ impl Metastore for MetastoreGrpcClient { } /// List indexes. - async fn list_indexes_metadatas(&self) -> MetastoreResult> { + async fn list_indexes_metadatas( + &self, + query: ListIndexesQuery, + ) -> MetastoreResult> { + let filter_json = serde_json::to_string(&query).map_err(|error| { + MetastoreError::JsonDeserializeError { + struct_name: "ListIndexesQuery".to_string(), + message: error.to_string(), + } + })?; let response = self .underlying .clone() - .list_indexes_metadatas(ListIndexesMetadatasRequest {}) + .list_indexes_metadatas(ListIndexesMetadatasRequest { filter_json }) .await .map_err(|tonic_error| parse_grpc_error(&tonic_error))?; let indexes_metadatas = diff --git a/quickwit/quickwit-metastore/src/metastore/instrumented_metastore.rs b/quickwit/quickwit-metastore/src/metastore/instrumented_metastore.rs index 5f366c176f6..1ba72faf11e 100644 --- a/quickwit/quickwit-metastore/src/metastore/instrumented_metastore.rs +++ b/quickwit/quickwit-metastore/src/metastore/instrumented_metastore.rs @@ -18,13 +18,17 @@ // along with this program. If not, see . use async_trait::async_trait; +use itertools::Itertools; use quickwit_common::uri::Uri; use quickwit_config::{IndexConfig, SourceConfig}; use quickwit_proto::metastore::{DeleteQuery, DeleteTask}; use quickwit_proto::IndexUid; use crate::checkpoint::IndexCheckpointDelta; -use crate::{IndexMetadata, ListSplitsQuery, Metastore, MetastoreResult, Split, SplitMetadata}; +use crate::{ + IndexMetadata, ListIndexesQuery, ListSplitsQuery, Metastore, MetastoreResult, Split, + SplitMetadata, +}; macro_rules! instrument { ($expr:expr, [$operation:ident, $($label:expr),*]) => { @@ -106,9 +110,12 @@ impl Metastore for InstrumentedMetastore { ); } - async fn list_indexes_metadatas(&self) -> MetastoreResult> { + async fn list_indexes_metadatas( + &self, + query: ListIndexesQuery, + ) -> MetastoreResult> { instrument!( - self.underlying.list_indexes_metadatas().await, + self.underlying.list_indexes_metadatas(query).await, [list_indexes_metadatas, ""] ); } @@ -156,9 +163,15 @@ impl Metastore for InstrumentedMetastore { } async fn list_splits(&self, query: ListSplitsQuery) -> MetastoreResult> { + let index_uids = query + .index_uids + .iter() + .map(|index_uid| index_uid.to_string()) + .collect_vec() + .join(","); instrument!( self.underlying.list_splits(query.clone()).await, - [list_splits, query.index_uid.index_id()] + [list_splits, &index_uids] ); } diff --git a/quickwit/quickwit-metastore/src/metastore/metastore_event_publisher.rs b/quickwit/quickwit-metastore/src/metastore/metastore_event_publisher.rs index c9cf8afe2eb..fc0f079f2cd 100644 --- a/quickwit/quickwit-metastore/src/metastore/metastore_event_publisher.rs +++ b/quickwit/quickwit-metastore/src/metastore/metastore_event_publisher.rs @@ -29,7 +29,10 @@ use quickwit_proto::IndexUid; use tracing::info; use crate::checkpoint::IndexCheckpointDelta; -use crate::{IndexMetadata, ListSplitsQuery, Metastore, MetastoreResult, Split, SplitMetadata}; +use crate::{ + IndexMetadata, ListIndexesQuery, ListSplitsQuery, Metastore, MetastoreResult, Split, + SplitMetadata, +}; /// Metastore events dispatched to subscribers. #[derive(Debug, Clone, Eq, PartialEq)] @@ -113,8 +116,11 @@ impl Metastore for MetastoreEventPublisher { self.underlying.index_metadata(index_id).await } - async fn list_indexes_metadatas(&self) -> MetastoreResult> { - self.underlying.list_indexes_metadatas().await + async fn list_indexes_metadatas( + &self, + query: ListIndexesQuery, + ) -> MetastoreResult> { + self.underlying.list_indexes_metadatas(query).await } async fn delete_index(&self, index_uid: IndexUid) -> MetastoreResult<()> { diff --git a/quickwit/quickwit-metastore/src/metastore/mod.rs b/quickwit/quickwit-metastore/src/metastore/mod.rs index 773575d4724..33e7c9c9f24 100644 --- a/quickwit/quickwit-metastore/src/metastore/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/mod.rs @@ -109,7 +109,7 @@ pub trait Metastore: Send + Sync + 'static { async fn index_exists(&self, index_id: &str) -> MetastoreResult { match self.index_metadata(index_id).await { Ok(_) => Ok(true), - Err(MetastoreError::IndexDoesNotExist { .. }) => Ok(false), + Err(MetastoreError::IndexesDoNotExist { .. }) => Ok(false), Err(error) => Err(error), } } @@ -129,8 +129,8 @@ pub trait Metastore: Send + Sync + 'static { let index_metadata = self.index_metadata(index_uid.index_id()).await?; if index_metadata.index_uid != *index_uid { - return Err(MetastoreError::IndexDoesNotExist { - index_id: index_uid.index_id().to_string(), + return Err(MetastoreError::IndexesDoNotExist { + index_ids: vec![index_uid.index_id().to_string()], }); } Ok(index_metadata) @@ -140,7 +140,10 @@ pub trait Metastore: Send + Sync + 'static { /// /// This API lists the indexes stored in the metastore and returns a collection of /// [`IndexMetadata`]. - async fn list_indexes_metadatas(&self) -> MetastoreResult>; + async fn list_indexes_metadatas( + &self, + list_indexes_query: ListIndexesQuery, + ) -> MetastoreResult>; /// Deletes an index. /// @@ -309,11 +312,21 @@ pub trait Metastore: Send + Sync + 'static { ) -> MetastoreResult>; } +/// Query builder for listing index metadatas. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub enum ListIndexesQuery { + /// List of index ID pattern. + /// A pattern can contain the wildcard character `*`. + IndexIdPatterns(Vec), + /// Match all indexes + All, +} + #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] /// A query builder for listing splits within the metastore. pub struct ListSplitsQuery { /// The index to get splits from. - pub index_uid: IndexUid, + pub index_uids: Vec, /// The maximum number of splits to retrieve. pub limit: Option, @@ -348,7 +361,7 @@ impl ListSplitsQuery { /// Creates a new [ListSplitsQuery] for a specific index. pub fn for_index(index_uid: IndexUid) -> Self { Self { - index_uid, + index_uids: vec![index_uid], limit: None, offset: None, split_states: Vec::new(), @@ -361,6 +374,29 @@ impl ListSplitsQuery { } } + /// Creates a new [ListSplitsQuery] from a non-empty list of index Uids. + /// Returns an error if the list of index uids is empty. + pub fn for_indexes(index_uids: Vec) -> MetastoreResult { + if index_uids.is_empty() { + return Err(MetastoreError::InternalError { + message: "ListSplitQuery should define at least one index uid.".to_string(), + cause: "".to_string(), + }); + } + Ok(Self { + index_uids, + limit: None, + offset: None, + split_states: Vec::new(), + tags: None, + time_range: Default::default(), + delete_opstamp: Default::default(), + update_timestamp: Default::default(), + create_timestamp: Default::default(), + mature: Bound::Unbounded, + }) + } + /// Sets the maximum number of splits to retrieve. pub fn with_limit(mut self, n: usize) -> Self { self.limit = Some(n); diff --git a/quickwit/quickwit-metastore/src/metastore/postgresql_metastore.rs b/quickwit/quickwit-metastore/src/metastore/postgresql_metastore.rs index 7a6ae58379d..3c5f11890cb 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgresql_metastore.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgresql_metastore.rs @@ -17,7 +17,7 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fmt::{Display, Write}; use std::ops::Bound; use std::str::FromStr; @@ -29,7 +29,8 @@ use itertools::Itertools; use quickwit_common::uri::Uri; use quickwit_common::PrettySample; use quickwit_config::{ - IndexConfig, MetastoreBackend, MetastoreConfig, PostgresMetastoreConfig, SourceConfig, + validate_index_id_pattern, IndexConfig, MetastoreBackend, MetastoreConfig, + PostgresMetastoreConfig, SourceConfig, }; use quickwit_doc_mapper::tag_pruning::TagFilterAst; use quickwit_proto::metastore::{DeleteQuery, DeleteTask}; @@ -48,7 +49,7 @@ use crate::metastore::postgresql_model::{ }; use crate::metastore::FilterRange; use crate::{ - IndexMetadata, ListSplitsQuery, Metastore, MetastoreError, MetastoreFactory, + IndexMetadata, ListIndexesQuery, ListSplitsQuery, Metastore, MetastoreError, MetastoreFactory, MetastoreResolverError, MetastoreResult, Split, SplitMaturity, SplitMetadata, SplitState, }; @@ -192,8 +193,8 @@ async fn index_metadata( ) -> MetastoreResult { index_opt(tx.as_mut(), index_id) .await? - .ok_or_else(|| MetastoreError::IndexDoesNotExist { - index_id: index_id.to_string(), + .ok_or_else(|| MetastoreError::IndexesDoNotExist { + index_ids: vec![index_id.to_string()], })? .index_metadata() } @@ -229,7 +230,17 @@ fn write_sql_filter( } fn build_query_filter(mut sql: String, query: &ListSplitsQuery) -> String { - sql.push_str(" WHERE index_uid = $1"); + // ListSplitsQuery cannot be built on an empty index list. + assert!(!query.index_uids.is_empty()); + sql.push_str(" WHERE ("); + for (idx, index_uid) in query.index_uids.iter().enumerate() { + let _ = write!(sql, "index_uid = '{}'", index_uid); + + if idx < query.index_uids.len() - 1 { + sql.push_str(" OR "); + } + } + sql.push(')'); if !query.split_states.is_empty() { let params = query @@ -342,8 +353,8 @@ fn convert_sqlx_err(index_id: &str, sqlx_err: sqlx::Error) -> MetastoreError { let pg_error_table = pg_db_error.table(); match (pg_error_code, pg_error_table) { - (pg_error_code::FOREIGN_KEY_VIOLATION, _) => MetastoreError::IndexDoesNotExist { - index_id: index_id.to_string(), + (pg_error_code::FOREIGN_KEY_VIOLATION, _) => MetastoreError::IndexesDoNotExist { + index_ids: vec![index_id.to_string()], }, (pg_error_code::UNIQUE_VIOLATION, Some(table)) if table.starts_with("indexes") => { MetastoreError::IndexAlreadyExists { @@ -410,8 +421,8 @@ where let index_id = index_uid.index_id(); let mut index_metadata = index_metadata(tx, index_id).await?; if index_metadata.index_uid != index_uid { - return Err(MetastoreError::IndexDoesNotExist { - index_id: index_id.to_string(), + return Err(MetastoreError::IndexesDoNotExist { + index_ids: vec![index_id.to_string()], }); } let mutation_occurred = mutate_fn(&mut index_metadata)?; @@ -436,8 +447,8 @@ where .execute(tx.as_mut()) .await?; if update_index_res.rows_affected() == 0 { - return Err(MetastoreError::IndexDoesNotExist { - index_id: index_id.to_string(), + return Err(MetastoreError::IndexesDoNotExist { + index_ids: vec![index_id.to_string()], }); } Ok(mutation_occurred) @@ -451,8 +462,22 @@ impl Metastore for PostgresqlMetastore { } #[instrument(skip(self))] - async fn list_indexes_metadatas(&self) -> MetastoreResult> { - let pg_indexes = sqlx::query_as::<_, PgIndex>("SELECT * FROM indexes") + async fn list_indexes_metadatas( + &self, + query: ListIndexesQuery, + ) -> MetastoreResult> { + let sql = match query { + ListIndexesQuery::All => "SELECT * FROM indexes".to_string(), + ListIndexesQuery::IndexIdPatterns(index_id_patterns) => { + build_index_id_patterns_sql_query(index_id_patterns).map_err(|error| { + MetastoreError::InternalError { + message: "Failed to build `list_indexes_metadatas` SQL query".to_string(), + cause: error.to_string(), + } + })? + } + }; + let pg_indexes = sqlx::query_as::<_, PgIndex>(&sql) .fetch_all(&self.connection_pool) .await?; pg_indexes @@ -489,8 +514,8 @@ impl Metastore for PostgresqlMetastore { .execute(&self.connection_pool) .await?; if delete_res.rows_affected() == 0 { - return Err(MetastoreError::IndexDoesNotExist { - index_id: index_uid.index_id().to_string(), + return Err(MetastoreError::IndexesDoNotExist { + index_ids: vec![index_uid.index_id().to_string()], }); } Ok(()) @@ -607,8 +632,8 @@ impl Metastore for PostgresqlMetastore { run_with_tx!(self.connection_pool, tx, { let mut index_metadata = index_metadata(tx, index_uid.index_id()).await?; if index_metadata.index_uid != index_uid { - return Err(MetastoreError::IndexDoesNotExist { - index_id: index_uid.index_id().to_string(), + return Err(MetastoreError::IndexesDoNotExist { + index_ids: vec![index_uid.index_id().to_string()], }); } if let Some(checkpoint_delta) = checkpoint_delta_opt { @@ -729,25 +754,40 @@ impl Metastore for PostgresqlMetastore { }) } - #[instrument(skip(self), fields(index_id=query.index_uid.index_id()))] + #[instrument(skip(self), fields(index_uids=query.index_uids.iter().join(",")))] async fn list_splits(&self, query: ListSplitsQuery) -> MetastoreResult> { let sql_base = "SELECT * FROM splits".to_string(); let sql = build_query_filter(sql_base, &query); let pg_splits = sqlx::query_as::<_, PgSplit>(&sql) - .bind(query.index_uid.to_string()) .fetch_all(&self.connection_pool) .await?; - // If no splits were returned, maybe the index does not exist in the first place? - if pg_splits.is_empty() - && index_opt_for_uid(&self.connection_pool, query.index_uid.clone()) + // If no splits were returned, maybe some indexes do not exist in the first place? + // TODO: the file-backed metastore is more accurate as it checks for index existence before + // returning splits. We could do the same here or remove index existence check `list_splits` + // for all metastore implementations. + if pg_splits.is_empty() { + let index_ids_str = query + .index_uids + .iter() + .map(|index_uid| index_uid.index_id().to_string()) + .collect_vec(); + let found_index_ids: HashSet = self + .list_indexes_metadatas(ListIndexesQuery::IndexIdPatterns(index_ids_str.clone())) .await? - .is_none() - { - return Err(MetastoreError::IndexDoesNotExist { - index_id: query.index_uid.index_id().to_string(), - }); + .into_iter() + .map(|index_metadata| index_metadata.index_id().to_string()) + .collect(); + let missing_index_ids = index_ids_str + .into_iter() + .filter(|index_id| !found_index_ids.contains(index_id)) + .collect_vec(); + if !missing_index_ids.is_empty() { + return Err(MetastoreError::IndexesDoNotExist { + index_ids: missing_index_ids, + }); + } } pg_splits .into_iter() @@ -809,8 +849,8 @@ impl Metastore for PostgresqlMetastore { .await? .is_none() { - return Err(MetastoreError::IndexDoesNotExist { - index_id: index_uid.index_id().to_string(), + return Err(MetastoreError::IndexesDoNotExist { + index_ids: vec![index_uid.index_id().to_string()], }); } info!( @@ -891,8 +931,8 @@ impl Metastore for PostgresqlMetastore { .await? .is_none() { - return Err(MetastoreError::IndexDoesNotExist { - index_id: index_uid.index_id().to_string(), + return Err(MetastoreError::IndexesDoNotExist { + index_ids: vec![index_uid.index_id().to_string()], }); } if !not_deletable_split_ids.is_empty() { @@ -917,8 +957,8 @@ impl Metastore for PostgresqlMetastore { async fn index_metadata(&self, index_id: &str) -> MetastoreResult { index_opt(&self.connection_pool, index_id) .await? - .ok_or_else(|| MetastoreError::IndexDoesNotExist { - index_id: index_id.to_string(), + .ok_or_else(|| MetastoreError::IndexesDoNotExist { + index_ids: vec![index_id.to_string()], })? .index_metadata() } @@ -1077,8 +1117,8 @@ impl Metastore for PostgresqlMetastore { .await? .is_none() { - return Err(MetastoreError::IndexDoesNotExist { - index_id: index_uid.index_id().to_string(), + return Err(MetastoreError::IndexesDoNotExist { + index_ids: vec![index_uid.index_id().to_string()], }); } Ok(()) @@ -1145,8 +1185,8 @@ impl Metastore for PostgresqlMetastore { .await? .is_none() { - return Err(MetastoreError::IndexDoesNotExist { - index_id: index_uid.index_id().to_string(), + return Err(MetastoreError::IndexesDoNotExist { + index_ids: vec![index_uid.index_id().to_string()], }); } pg_stale_splits @@ -1212,6 +1252,31 @@ fn tags_filter_expression_helper(tags: &TagFilterAst) -> String { } } +/// Builds an SQL query that returns indexes which match at least one pattern in +/// `index_id_patterns`. For each pattern, we check if the pattern is valid and replace `*` by `%` +/// to build a SQL `LIKE` query. +fn build_index_id_patterns_sql_query(index_id_patterns: Vec) -> anyhow::Result { + assert!(!index_id_patterns.is_empty()); + if index_id_patterns.iter().any(|pattern| pattern == "*") { + return Ok("SELECT * FROM indexes".to_string()); + } + let mut like_or_query: String = String::new(); + for (index_id_pattern_idx, index_id_pattern) in index_id_patterns.iter().enumerate() { + validate_index_id_pattern(index_id_pattern).map_err(|error| { + MetastoreError::InternalError { + message: "Failed to build list indexes query".to_string(), + cause: error.to_string(), + } + })?; + let sql_pattern = index_id_pattern.replace('*', "%"); + let _ = write!(like_or_query, "index_id LIKE '{sql_pattern}'"); + if index_id_pattern_idx < index_id_patterns.len() - 1 { + like_or_query.push_str(" OR "); + } + } + Ok(format!("SELECT * FROM indexes WHERE {like_or_query}")) +} + /// A postgres metastore factory #[derive(Clone, Default)] pub struct PostgresqlMetastoreFactory { @@ -1314,6 +1379,7 @@ mod tests { use time::OffsetDateTime; use super::{build_query_filter, tags_filter_expression_helper}; + use crate::metastore::postgresql_metastore::build_index_id_patterns_sql_query; use crate::{ListSplitsQuery, SplitState}; fn test_tags_filter_expression_helper(tags_ast: TagFilterAst, expected: &str) { @@ -1375,20 +1441,24 @@ mod tests { "$Quickwit!$tag:$$;DELETE FROM something_evil$Quickwit!$ = ANY(tags)", ); } + #[test] fn test_single_sql_query_builder() { let index_uid = IndexUid::new("test-index"); let query = ListSplitsQuery::for_index(index_uid.clone()).with_split_state(SplitState::Staged); let sql = build_query_filter(String::new(), &query); - assert_eq!(sql, " WHERE index_uid = $1 AND split_state IN ('Staged')"); + assert_eq!( + sql, + format!(" WHERE (index_uid = '{index_uid}') AND split_state IN ('Staged')") + ); let query = ListSplitsQuery::for_index(index_uid.clone()).with_split_state(SplitState::Published); let sql = build_query_filter(String::new(), &query); assert_eq!( sql, - " WHERE index_uid = $1 AND split_state IN ('Published')" + format!(" WHERE (index_uid = '{index_uid}') AND split_state IN ('Published')") ); let query = ListSplitsQuery::for_index(index_uid.clone()) @@ -1396,21 +1466,24 @@ mod tests { let sql = build_query_filter(String::new(), &query); assert_eq!( sql, - " WHERE index_uid = $1 AND split_state IN ('Published', 'MarkedForDeletion')" + format!( + " WHERE (index_uid = '{index_uid}') AND split_state IN ('Published', \ + 'MarkedForDeletion')" + ) ); let query = ListSplitsQuery::for_index(index_uid.clone()).with_update_timestamp_lt(51); let sql = build_query_filter(String::new(), &query); assert_eq!( sql, - " WHERE index_uid = $1 AND update_timestamp < to_timestamp(51)" + format!(" WHERE (index_uid = '{index_uid}') AND update_timestamp < to_timestamp(51)") ); let query = ListSplitsQuery::for_index(index_uid.clone()).with_create_timestamp_lte(55); let sql = build_query_filter(String::new(), &query); assert_eq!( sql, - " WHERE index_uid = $1 AND create_timestamp <= to_timestamp(55)" + format!(" WHERE (index_uid = '{index_uid}') AND create_timestamp <= to_timestamp(55)") ); let maturity_evaluation_datetime = OffsetDateTime::from_unix_timestamp(55).unwrap(); @@ -1419,8 +1492,10 @@ mod tests { let sql = build_query_filter(String::new(), &query); assert_eq!( sql, - " WHERE index_uid = $1 AND (maturity_timestamp = to_timestamp(0) OR to_timestamp(55) \ - >= maturity_timestamp)" + format!( + " WHERE (index_uid = '{index_uid}') AND (maturity_timestamp = to_timestamp(0) OR \ + to_timestamp(55) >= maturity_timestamp)" + ) ); let query = ListSplitsQuery::for_index(index_uid.clone()) @@ -1428,35 +1503,45 @@ mod tests { let sql = build_query_filter(String::new(), &query); assert_eq!( sql, - " WHERE index_uid = $1 AND to_timestamp(55) < maturity_timestamp" + format!(" WHERE (index_uid = '{index_uid}') AND to_timestamp(55) < maturity_timestamp") ); let query = ListSplitsQuery::for_index(index_uid.clone()).with_delete_opstamp_gte(4); let sql = build_query_filter(String::new(), &query); - assert_eq!(sql, " WHERE index_uid = $1 AND delete_opstamp >= 4"); + assert_eq!( + sql, + format!(" WHERE (index_uid = '{index_uid}') AND delete_opstamp >= 4") + ); let query = ListSplitsQuery::for_index(index_uid.clone()).with_time_range_start_gt(45); let sql = build_query_filter(String::new(), &query); assert_eq!( sql, - " WHERE index_uid = $1 AND (time_range_end > 45 OR time_range_end IS NULL)" + format!( + " WHERE (index_uid = '{index_uid}') AND (time_range_end > 45 OR time_range_end IS \ + NULL)" + ) ); let query = ListSplitsQuery::for_index(index_uid.clone()).with_time_range_end_lt(45); let sql = build_query_filter(String::new(), &query); assert_eq!( sql, - " WHERE index_uid = $1 AND (time_range_start < 45 OR time_range_start IS NULL)" + format!( + " WHERE (index_uid = '{index_uid}') AND (time_range_start < 45 OR \ + time_range_start IS NULL)" + ) ); - let query = ListSplitsQuery::for_index(index_uid).with_tags_filter(TagFilterAst::Tag { - is_present: false, - tag: "tag-2".to_string(), - }); + let query = + ListSplitsQuery::for_index(index_uid.clone()).with_tags_filter(TagFilterAst::Tag { + is_present: false, + tag: "tag-2".to_string(), + }); let sql = build_query_filter(String::new(), &query); assert_eq!( sql, - " WHERE index_uid = $1 AND (NOT ($$tag-2$$ = ANY(tags)))" + format!(" WHERE (index_uid = '{index_uid}') AND (NOT ($$tag-2$$ = ANY(tags)))") ); } @@ -1469,8 +1554,10 @@ mod tests { let sql = build_query_filter(String::new(), &query); assert_eq!( sql, - " WHERE index_uid = $1 AND (time_range_end > 0 OR time_range_end IS NULL) AND \ - (time_range_start < 40 OR time_range_start IS NULL)" + format!( + " WHERE (index_uid = '{index_uid}') AND (time_range_end > 0 OR time_range_end IS \ + NULL) AND (time_range_start < 40 OR time_range_start IS NULL)" + ) ); let query = ListSplitsQuery::for_index(index_uid.clone()) @@ -1479,8 +1566,10 @@ mod tests { let sql = build_query_filter(String::new(), &query); assert_eq!( sql, - " WHERE index_uid = $1 AND (time_range_end > 45 OR time_range_end IS NULL) AND \ - delete_opstamp > 0" + format!( + " WHERE (index_uid = '{index_uid}') AND (time_range_end > 45 OR time_range_end IS \ + NULL) AND delete_opstamp > 0" + ) ); let query = ListSplitsQuery::for_index(index_uid.clone()) @@ -1489,11 +1578,13 @@ mod tests { let sql = build_query_filter(String::new(), &query); assert_eq!( sql, - " WHERE index_uid = $1 AND update_timestamp < to_timestamp(51) AND create_timestamp \ - <= to_timestamp(63)" + format!( + " WHERE (index_uid = '{index_uid}') AND update_timestamp < to_timestamp(51) AND \ + create_timestamp <= to_timestamp(63)" + ) ); - let query = ListSplitsQuery::for_index(index_uid) + let query = ListSplitsQuery::for_index(index_uid.clone()) .with_time_range_start_gt(90) .with_tags_filter(TagFilterAst::Tag { is_present: true, @@ -1502,8 +1593,53 @@ mod tests { let sql = build_query_filter(String::new(), &query); assert_eq!( sql, - " WHERE index_uid = $1 AND ($$tag-1$$ = ANY(tags)) AND (time_range_end > 90 OR \ - time_range_end IS NULL)" + format!( + " WHERE (index_uid = '{index_uid}') AND ($$tag-1$$ = ANY(tags)) AND \ + (time_range_end > 90 OR time_range_end IS NULL)" + ) + ); + + let index_uid_2 = IndexUid::new("test-index-2"); + let query = + ListSplitsQuery::for_indexes(vec![index_uid.clone(), index_uid_2.clone()]).unwrap(); + let sql = build_query_filter(String::new(), &query); + assert_eq!( + sql, + format!(" WHERE (index_uid = '{index_uid}' OR index_uid = '{index_uid_2}')") + ); + } + + #[test] + fn test_index_id_pattern_like_query() { + assert_eq!( + &build_index_id_patterns_sql_query(vec!["*-index-*-last*".to_string()]).unwrap(), + "SELECT * FROM indexes WHERE index_id LIKE '%-index-%-last%'" + ); + assert_eq!( + &build_index_id_patterns_sql_query(vec![ + "*-index-*-last*".to_string(), + "another-index".to_string() + ]) + .unwrap(), + "SELECT * FROM indexes WHERE index_id LIKE '%-index-%-last%' OR index_id LIKE \ + 'another-index'" + ); + assert_eq!( + &build_index_id_patterns_sql_query(vec![ + "*-index-*-last**".to_string(), + "another-index".to_string(), + "*".to_string() + ]) + .unwrap(), + "SELECT * FROM indexes" + ); + assert_eq!( + build_index_id_patterns_sql_query(vec!["*-index-*-&-last**".to_string()]) + .unwrap_err() + .to_string(), + "Internal error: `Failed to build list indexes query` Cause: `Index ID pattern \ + `*-index-*-&-last**` is invalid. Patterns must match the following regular \ + expression: `^[a-zA-Z\\*][a-zA-Z0-9-_\\.\\*]{0,254}$`.`." ); } } diff --git a/quickwit/quickwit-metastore/src/metastore/retrying_metastore/mod.rs b/quickwit/quickwit-metastore/src/metastore/retrying_metastore/mod.rs index 7c99545ec25..d4f0d66cf6e 100644 --- a/quickwit/quickwit-metastore/src/metastore/retrying_metastore/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/retrying_metastore/mod.rs @@ -30,7 +30,10 @@ use quickwit_proto::IndexUid; use self::retry::{retry, RetryParams}; use crate::checkpoint::IndexCheckpointDelta; -use crate::{IndexMetadata, ListSplitsQuery, Metastore, MetastoreResult, Split, SplitMetadata}; +use crate::{ + IndexMetadata, ListIndexesQuery, ListSplitsQuery, Metastore, MetastoreResult, Split, + SplitMetadata, +}; /// Retry layer for a [`Metastore`]. /// This is a band-aid solution for now. This will be removed after retry can be usable on @@ -85,9 +88,12 @@ impl Metastore for RetryingMetastore { .await } - async fn list_indexes_metadatas(&self) -> MetastoreResult> { + async fn list_indexes_metadatas( + &self, + query: ListIndexesQuery, + ) -> MetastoreResult> { retry(&self.retry_params, || async { - self.inner.list_indexes_metadatas().await + self.inner.list_indexes_metadatas(query.clone()).await }) .await } diff --git a/quickwit/quickwit-metastore/src/metastore/retrying_metastore/test.rs b/quickwit/quickwit-metastore/src/metastore/retrying_metastore/test.rs index 857dfb5c553..13eee80f7a7 100644 --- a/quickwit/quickwit-metastore/src/metastore/retrying_metastore/test.rs +++ b/quickwit/quickwit-metastore/src/metastore/retrying_metastore/test.rs @@ -28,8 +28,8 @@ use quickwit_proto::IndexUid; use super::retry::RetryParams; use crate::checkpoint::IndexCheckpointDelta; use crate::{ - IndexMetadata, ListSplitsQuery, Metastore, MetastoreError, MetastoreResult, RetryingMetastore, - Split, SplitMetadata, + IndexMetadata, ListIndexesQuery, ListSplitsQuery, Metastore, MetastoreError, MetastoreResult, + RetryingMetastore, Split, SplitMetadata, }; struct RetryTestMetastore { @@ -97,7 +97,10 @@ impl Metastore for RetryTestMetastore { } } - async fn list_indexes_metadatas(&self) -> MetastoreResult> { + async fn list_indexes_metadatas( + &self, + _query: ListIndexesQuery, + ) -> MetastoreResult> { let result = self.try_success(); match result { Ok(_) => Ok(Vec::new()), @@ -241,17 +244,23 @@ async fn test_retryable_metastore_errors() { // On retryable errors, if max retry count is not achieved, RetryingMetastore should retry until // success - assert!(metastore.list_indexes_metadatas().await.is_ok()); + assert!(metastore + .list_indexes_metadatas(ListIndexesQuery::All) + .await + .is_ok()); let metastore: RetryingMetastore = RetryTestMetastore::new_retrying_with_errors( 5, - &[MetastoreError::IndexDoesNotExist { - index_id: "".to_string(), + &[MetastoreError::IndexesDoNotExist { + index_ids: vec!["".to_string()], }], ); // On non-retryable errors, RetryingMetastore should exit with an error. - assert!(metastore.list_indexes_metadatas().await.is_err()); + assert!(metastore + .list_indexes_metadatas(ListIndexesQuery::All) + .await + .is_err()); } #[tokio::test] @@ -267,7 +276,10 @@ async fn test_retryable_more_than_max_retry() { .collect::>(), ); - let error = metastore.list_indexes_metadatas().await.unwrap_err(); + let error = metastore + .list_indexes_metadatas(ListIndexesQuery::All) + .await + .unwrap_err(); assert_eq!( error, MetastoreError::ConnectionError { @@ -299,7 +311,10 @@ async fn test_mixed_retryable_metastore_errors() { ], ); - let error = metastore.list_indexes_metadatas().await.unwrap_err(); + let error = metastore + .list_indexes_metadatas(ListIndexesQuery::All) + .await + .unwrap_err(); assert_eq!( error, diff --git a/quickwit/quickwit-metastore/src/tests.rs b/quickwit/quickwit-metastore/src/tests.rs index 4a37343d06f..b89b2cb3a15 100644 --- a/quickwit/quickwit-metastore/src/tests.rs +++ b/quickwit/quickwit-metastore/src/tests.rs @@ -32,7 +32,7 @@ pub mod test_suite { use quickwit_doc_mapper::tag_pruning::{no_tag, tag, TagFilterAst}; use quickwit_proto::metastore::DeleteQuery; use quickwit_proto::IndexUid; - use quickwit_query::query_ast::qast_helper; + use quickwit_query::query_ast::qast_string_helper; use time::OffsetDateTime; use tokio::time::sleep; use tracing::{error, info}; @@ -41,7 +41,8 @@ pub mod test_suite { IndexCheckpointDelta, PartitionId, Position, SourceCheckpoint, SourceCheckpointDelta, }; use crate::{ - ListSplitsQuery, Metastore, MetastoreError, Split, SplitMaturity, SplitMetadata, SplitState, + ListIndexesQuery, ListSplitsQuery, Metastore, MetastoreError, Split, SplitMaturity, + SplitMetadata, SplitState, }; #[async_trait] @@ -161,7 +162,7 @@ pub mod test_suite { .index_metadata("index-not-found") .await .unwrap_err(); - assert!(matches!(error, MetastoreError::IndexDoesNotExist { .. })); + assert!(matches!(error, MetastoreError::IndexesDoNotExist { .. })); let index_uid = metastore.create_index(index_config.clone()).await.unwrap(); @@ -173,24 +174,24 @@ pub mod test_suite { cleanup_index(&metastore, index_uid).await; } - pub async fn test_metastore_list_indexes() { + pub async fn test_metastore_list_all_indexes() { let metastore = MetastoreToTest::default_for_test().await; - let index_id_suffix = append_random_suffix("test-list-indexes"); - let index_id_1 = format!("{index_id_suffix}-1"); + let index_id_prefix = append_random_suffix("test-list-all-indexes"); + let index_id_1 = format!("{index_id_prefix}-1"); let index_uri_1 = format!("ram:///indexes/{index_id_1}"); let index_config_1 = IndexConfig::for_test(&index_id_1, &index_uri_1); - let index_id_2 = format!("{index_id_suffix}-2"); + let index_id_2 = format!("{index_id_prefix}-2"); let index_uri_2 = format!("ram:///indexes/{index_id_2}"); let index_config_2 = IndexConfig::for_test(&index_id_2, &index_uri_2); let indexes_count = metastore - .list_indexes_metadatas() + .list_indexes_metadatas(ListIndexesQuery::All) .await .unwrap() .into_iter() - .filter(|index| index.index_id().starts_with(&index_id_suffix)) + .filter(|index| index.index_id().starts_with(&index_id_prefix)) .count(); assert_eq!(indexes_count, 0); @@ -198,11 +199,11 @@ pub mod test_suite { let index_uid_2 = metastore.create_index(index_config_2).await.unwrap(); let indexes_count = metastore - .list_indexes_metadatas() + .list_indexes_metadatas(ListIndexesQuery::All) .await .unwrap() .into_iter() - .filter(|index| index.index_id().starts_with(&index_id_suffix)) + .filter(|index| index.index_id().starts_with(&index_id_prefix)) .count(); assert_eq!(indexes_count, 2); @@ -210,6 +211,56 @@ pub mod test_suite { cleanup_index(&metastore, index_uid_2).await; } + pub async fn test_metastore_list_indexes() { + let metastore = MetastoreToTest::default_for_test().await; + + let index_id_fragment = append_random_suffix("test-list-indexes"); + let index_id_1 = format!("prefix-1-{index_id_fragment}-suffix-1"); + let index_uri_1 = format!("ram:///indexes/{index_id_1}"); + let index_config_1 = IndexConfig::for_test(&index_id_1, &index_uri_1); + + let index_id_2 = format!("prefix-2-{index_id_fragment}-suffix-2"); + let index_uri_2 = format!("ram:///indexes/{index_id_2}"); + let index_config_2 = IndexConfig::for_test(&index_id_2, &index_uri_2); + + let index_id_3 = format!("prefix.3.{index_id_fragment}.3"); + let index_uri_3 = format!("ram:///indexes/{index_id_3}"); + let index_config_3 = IndexConfig::for_test(&index_id_3, &index_uri_3); + + let index_id_4 = format!("p-4-{index_id_fragment}-suffix-4"); + let index_uri_4 = format!("ram:///indexes/{index_id_4}"); + let index_config_4 = IndexConfig::for_test(&index_id_4, &index_uri_4); + + let indexes_count = metastore + .list_indexes_metadatas(crate::ListIndexesQuery::IndexIdPatterns(vec![ + format!("prefix-*-{index_id_fragment}-suffix-*"), + format!("prefix*{index_id_fragment}*suffix-*"), + ])) + .await + .unwrap() + .len(); + assert_eq!(indexes_count, 0); + + let index_uid_1 = metastore.create_index(index_config_1).await.unwrap(); + let index_uid_2 = metastore.create_index(index_config_2).await.unwrap(); + let index_uid_3 = metastore.create_index(index_config_3).await.unwrap(); + let index_uid_4 = metastore.create_index(index_config_4).await.unwrap(); + + let indexes_count = metastore + .list_indexes_metadatas(crate::ListIndexesQuery::IndexIdPatterns(vec![format!( + "prefix-*-{index_id_fragment}-suffix-*" + )])) + .await + .unwrap() + .len(); + assert_eq!(indexes_count, 2); + + cleanup_index(&metastore, index_uid_1).await; + cleanup_index(&metastore, index_uid_2).await; + cleanup_index(&metastore, index_uid_3).await; + cleanup_index(&metastore, index_uid_4).await; + } + pub async fn test_metastore_delete_index() { let metastore = MetastoreToTest::default_for_test().await; @@ -221,13 +272,13 @@ pub mod test_suite { .delete_index(IndexUid::new("index-not-found")) .await .unwrap_err(); - assert!(matches!(error, MetastoreError::IndexDoesNotExist { .. })); + assert!(matches!(error, MetastoreError::IndexesDoNotExist { .. })); let error = metastore .delete_index(IndexUid::new("test-delete-index")) .await .unwrap_err(); - assert!(matches!(error, MetastoreError::IndexDoesNotExist { .. })); + assert!(matches!(error, MetastoreError::IndexesDoNotExist { .. })); let index_uid = metastore.create_index(index_config.clone()).await.unwrap(); @@ -319,14 +370,14 @@ pub mod test_suite { .add_source(IndexUid::new("index-not-found"), source.clone()) .await .unwrap_err(), - MetastoreError::IndexDoesNotExist { .. } + MetastoreError::IndexesDoNotExist { .. } )); assert!(matches!( metastore .add_source(IndexUid::new(index_id), source) .await .unwrap_err(), - MetastoreError::IndexDoesNotExist { .. } + MetastoreError::IndexesDoNotExist { .. } )); cleanup_index(&metastore, index_uid).await; } @@ -404,14 +455,14 @@ pub mod test_suite { .add_source(IndexUid::new("index-not-found"), source.clone()) .await .unwrap_err(), - MetastoreError::IndexDoesNotExist { .. } + MetastoreError::IndexesDoNotExist { .. } )); assert!(matches!( metastore .add_source(IndexUid::new(&index_id), source.clone()) .await .unwrap_err(), - MetastoreError::IndexDoesNotExist { .. } + MetastoreError::IndexesDoNotExist { .. } )); metastore @@ -438,14 +489,14 @@ pub mod test_suite { .delete_source(IndexUid::new("index-not-found"), &source_id) .await .unwrap_err(), - MetastoreError::IndexDoesNotExist { .. } + MetastoreError::IndexesDoNotExist { .. } )); assert!(matches!( metastore .delete_source(IndexUid::new(index_id), &source_id) .await .unwrap_err(), - MetastoreError::IndexDoesNotExist { .. } + MetastoreError::IndexesDoNotExist { .. } )); cleanup_index(&metastore, index_uid).await; @@ -520,7 +571,7 @@ pub mod test_suite { .reset_source_checkpoint(IndexUid::new("index-not-found"), &source_ids[1]) .await .unwrap_err(), - MetastoreError::IndexDoesNotExist { .. } + MetastoreError::IndexesDoNotExist { .. } )); assert!(matches!( @@ -528,7 +579,7 @@ pub mod test_suite { .reset_source_checkpoint(IndexUid::new(&index_id), &source_ids[1]) .await .unwrap_err(), - MetastoreError::IndexDoesNotExist { .. } + MetastoreError::IndexesDoNotExist { .. } )); metastore @@ -571,7 +622,7 @@ pub mod test_suite { ) .await .unwrap_err(); - assert!(matches!(error, MetastoreError::IndexDoesNotExist { .. })); + assert!(matches!(error, MetastoreError::IndexesDoNotExist { .. })); } // Update the checkpoint, by publishing an empty array of splits with a non-empty @@ -655,7 +706,7 @@ pub mod test_suite { ) .await .unwrap_err(); - assert!(matches!(error, MetastoreError::IndexDoesNotExist { .. })); + assert!(matches!(error, MetastoreError::IndexesDoNotExist { .. })); } // Publish a split on a wrong index uid @@ -673,7 +724,7 @@ pub mod test_suite { ) .await .unwrap_err(); - assert!(matches!(error, MetastoreError::IndexDoesNotExist { .. })); + assert!(matches!(error, MetastoreError::IndexesDoNotExist { .. })); } // Publish a non-existent split on an index @@ -1150,7 +1201,7 @@ pub mod test_suite { ) .await .unwrap_err(); - assert!(matches!(error, MetastoreError::IndexDoesNotExist { .. })); + assert!(matches!(error, MetastoreError::IndexesDoNotExist { .. })); } // Replace a non-existent split on an index @@ -1350,7 +1401,7 @@ pub mod test_suite { .mark_splits_for_deletion(IndexUid::new("index-not-found"), &[]) .await .unwrap_err(); - assert!(matches!(error, MetastoreError::IndexDoesNotExist { .. })); + assert!(matches!(error, MetastoreError::IndexesDoNotExist { .. })); metastore .mark_splits_for_deletion(index_uid.clone(), &["split-not-found"]) @@ -1472,14 +1523,14 @@ pub mod test_suite { .await .unwrap_err(); - assert!(matches!(error, MetastoreError::IndexDoesNotExist { .. })); + assert!(matches!(error, MetastoreError::IndexesDoNotExist { .. })); let error = metastore .delete_splits(IndexUid::new(&index_id), &[]) .await .unwrap_err(); - assert!(matches!(error, MetastoreError::IndexDoesNotExist { .. })); + assert!(matches!(error, MetastoreError::IndexesDoNotExist { .. })); metastore .delete_splits(index_uid.clone(), &["split-not-found"]) @@ -1602,7 +1653,7 @@ pub mod test_suite { .list_all_splits(IndexUid::new("index-not-found")) .await .unwrap_err(); - assert!(matches!(error, MetastoreError::IndexDoesNotExist { .. })); + assert!(matches!(error, MetastoreError::IndexesDoNotExist { .. })); let index_uid = metastore.create_index(index_config).await.unwrap(); @@ -1725,7 +1776,7 @@ pub mod test_suite { let query = ListSplitsQuery::for_index(index_uid.clone()).with_split_state(SplitState::Staged); let error = metastore.list_splits(query).await.unwrap_err(); - assert!(matches!(error, MetastoreError::IndexDoesNotExist { .. })); + assert!(matches!(error, MetastoreError::IndexesDoNotExist { .. })); } { let index_uid = metastore.create_index(index_config.clone()).await.unwrap(); @@ -2186,7 +2237,7 @@ pub mod test_suite { let index_uid = metastore.create_index(index_config.clone()).await.unwrap(); let delete_query = DeleteQuery { index_uid: index_uid.to_string(), - query_ast: qast_helper("my_field:my_value", &[]), + query_ast: qast_string_helper("my_field:my_value", &[]), start_timestamp: Some(1), end_timestamp: Some(2), }; @@ -2199,7 +2250,7 @@ pub mod test_suite { }) .await .unwrap_err(); - assert!(matches!(error, MetastoreError::IndexDoesNotExist { .. })); + assert!(matches!(error, MetastoreError::IndexesDoNotExist { .. })); // Create a delete task on an index with wrong incarnation_id let error = metastore @@ -2209,7 +2260,7 @@ pub mod test_suite { }) .await .unwrap_err(); - assert!(matches!(error, MetastoreError::IndexDoesNotExist { .. })); + assert!(matches!(error, MetastoreError::IndexesDoNotExist { .. })); // Create a delete task. let delete_task_1 = metastore @@ -2249,13 +2300,13 @@ pub mod test_suite { let delete_query_index_1 = DeleteQuery { index_uid: index_uid_1.to_string(), - query_ast: qast_helper("my_field:my_value", &[]), + query_ast: qast_string_helper("my_field:my_value", &[]), start_timestamp: Some(1), end_timestamp: Some(2), }; let delete_query_index_2 = DeleteQuery { index_uid: index_uid_2.to_string(), - query_ast: qast_helper("my_field:my_value", &[]), + query_ast: qast_string_helper("my_field:my_value", &[]), start_timestamp: Some(1), end_timestamp: Some(2), }; @@ -2304,7 +2355,7 @@ pub mod test_suite { let index_uid = metastore.create_index(index_config.clone()).await.unwrap(); let delete_query = DeleteQuery { index_uid: index_uid.to_string(), - query_ast: qast_helper("my_field:my_value", &[]), + query_ast: qast_string_helper("my_field:my_value", &[]), start_timestamp: Some(1), end_timestamp: Some(2), }; @@ -2338,13 +2389,13 @@ pub mod test_suite { .unwrap(); let delete_query_index_1 = DeleteQuery { index_uid: index_uid_1.to_string(), - query_ast: qast_helper("my_field:my_value", &[]), + query_ast: qast_string_helper("my_field:my_value", &[]), start_timestamp: Some(1), end_timestamp: Some(2), }; let delete_query_index_2 = DeleteQuery { index_uid: index_uid_2.to_string(), - query_ast: qast_helper("my_field:my_value", &[]), + query_ast: qast_string_helper("my_field:my_value", &[]), start_timestamp: Some(1), end_timestamp: Some(2), }; @@ -2439,7 +2490,8 @@ pub mod test_suite { .list_stale_splits(IndexUid::new("index-not-found"), 0, 10) .await .unwrap_err(); - assert!(matches!(error, MetastoreError::IndexDoesNotExist { .. })); + println!("{:?}", error); + assert!(matches!(error, MetastoreError::IndexesDoNotExist { .. })); { info!("List stale splits on an index"); @@ -2566,7 +2618,7 @@ pub mod test_suite { error!(err=?metastore_err); assert!(matches!( metastore_err, - MetastoreError::IndexDoesNotExist { .. } + MetastoreError::IndexesDoNotExist { .. } )); } @@ -2652,7 +2704,7 @@ pub mod test_suite { ) .await .unwrap_err(); - assert!(matches!(error, MetastoreError::IndexDoesNotExist { .. })); + assert!(matches!(error, MetastoreError::IndexesDoNotExist { .. })); let index_uid = metastore.create_index(index_config.clone()).await.unwrap(); @@ -2756,6 +2808,12 @@ macro_rules! metastore_test_suite { crate::tests::test_suite::test_metastore_list_indexes::<$metastore_type>().await; } + #[tokio::test] + async fn test_metastore_list_all_indexes() { + let _ = tracing_subscriber::fmt::try_init(); + crate::tests::test_suite::test_metastore_list_all_indexes::<$metastore_type>().await; + } + #[tokio::test] async fn test_metastore_delete_index() { let _ = tracing_subscriber::fmt::try_init(); diff --git a/quickwit/quickwit-proto/protos/quickwit/metastore.proto b/quickwit/quickwit-proto/protos/quickwit/metastore.proto index 03c263b6571..a92269d1855 100644 --- a/quickwit/quickwit-proto/protos/quickwit/metastore.proto +++ b/quickwit/quickwit-proto/protos/quickwit/metastore.proto @@ -89,7 +89,9 @@ message CreateIndexResponse { string index_uid = 1; } -message ListIndexesMetadatasRequest {} +message ListIndexesMetadatasRequest { + string filter_json = 1; +} message ListIndexesMetadatasResponse { string indexes_metadatas_serialized_json = 1; diff --git a/quickwit/quickwit-proto/protos/quickwit/search.proto b/quickwit/quickwit-proto/protos/quickwit/search.proto index 55ade13b0b6..0375fbdef66 100644 --- a/quickwit/quickwit-proto/protos/quickwit/search.proto +++ b/quickwit/quickwit-proto/protos/quickwit/search.proto @@ -98,8 +98,8 @@ message GetKVResponse { // -- Search ------------------- message SearchRequest { - // Index ID - string index_id = 1; + // Index ID patterns + repeated string index_id_patterns = 1; // deprecated `query`` reserved 2; @@ -139,7 +139,7 @@ message SearchRequest { optional string aggregation_request = 11; // Fields to extract snippet on - repeated string snippet_fields = 12; + repeated string snippet_fields = 12; // Optional sort by one or more fields (limited to 2 at the moment). repeated SortField sort_fields = 14; @@ -330,9 +330,6 @@ message FetchDocsRequest { // Request fetching the content of a given list of partial_hits. repeated PartialHit partial_hits = 1; - // Index ID - string index_id = 2; - // Split footer offsets. They are required for fetch docs to // fetch the document content in two reads, when the footer is not // cached. diff --git a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.metastore.rs b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.metastore.rs index a5af0981ea6..9768b2c8a95 100644 --- a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.metastore.rs +++ b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.metastore.rs @@ -15,7 +15,10 @@ pub struct CreateIndexResponse { #[derive(Serialize, Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] -pub struct ListIndexesMetadatasRequest {} +pub struct ListIndexesMetadatasRequest { + #[prost(string, tag = "1")] + pub filter_json: ::prost::alloc::string::String, +} #[derive(Serialize, Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] diff --git a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs index 536d59d69da..18cbeea386f 100644 --- a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs +++ b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs @@ -43,9 +43,9 @@ pub struct GetKvResponse { #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct SearchRequest { - /// Index ID - #[prost(string, tag = "1")] - pub index_id: ::prost::alloc::string::String, + /// Index ID patterns + #[prost(string, repeated, tag = "1")] + pub index_id_patterns: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, /// Json object representing Quickwit's QueryAst. #[prost(string, tag = "13")] pub query_ast: ::prost::alloc::string::String, @@ -308,9 +308,6 @@ pub struct FetchDocsRequest { /// Request fetching the content of a given list of partial_hits. #[prost(message, repeated, tag = "1")] pub partial_hits: ::prost::alloc::vec::Vec, - /// Index ID - #[prost(string, tag = "2")] - pub index_id: ::prost::alloc::string::String, /// Split footer offsets. They are required for fetch docs to /// fetch the document content in two reads, when the footer is not /// cached. diff --git a/quickwit/quickwit-proto/src/lib.rs b/quickwit/quickwit-proto/src/lib.rs index 6ecded6122a..20dc01182a1 100644 --- a/quickwit/quickwit-proto/src/lib.rs +++ b/quickwit/quickwit-proto/src/lib.rs @@ -184,7 +184,7 @@ impl TryFrom for SearchRequest { fn try_from(search_stream_req: SearchStreamRequest) -> Result { Ok(Self { - index_id: search_stream_req.index_id, + index_id_patterns: vec![search_stream_req.index_id], query_ast: search_stream_req.query_ast, snippet_fields: search_stream_req.snippet_fields, start_timestamp: search_stream_req.start_timestamp, @@ -200,7 +200,7 @@ impl TryFrom for SearchRequest { fn try_from(delete_query: DeleteQuery) -> anyhow::Result { let index_uid: IndexUid = delete_query.index_uid.into(); Ok(Self { - index_id: index_uid.index_id().to_string(), + index_id_patterns: vec![index_uid.index_id().to_string()], query_ast: delete_query.query_ast, start_timestamp: delete_query.start_timestamp, end_timestamp: delete_query.end_timestamp, diff --git a/quickwit/quickwit-query/src/query_ast/mod.rs b/quickwit/quickwit-query/src/query_ast/mod.rs index 6708dae41f1..b5b44c18d32 100644 --- a/quickwit/quickwit-query/src/query_ast/mod.rs +++ b/quickwit/quickwit-query/src/query_ast/mod.rs @@ -272,15 +272,19 @@ fn parse_user_query_in_asts( /// # Panics /// /// Panics if the user text is invalid. -pub fn qast_helper(user_text: &str, default_fields: &[&'static str]) -> String { +pub fn qast_string_helper(user_text: &str, default_fields: &[&'static str]) -> String { + let ast = qast_helper(user_text, default_fields); + serde_json::to_string(&ast).expect("The query AST should be JSON serializable.") +} + +pub fn qast_helper(user_text: &str, default_fields: &[&'static str]) -> QueryAst { let default_fields: Vec = default_fields .iter() .map(|default_field| default_field.to_string()) .collect(); - let ast: QueryAst = query_ast_from_user_text(user_text, Some(default_fields)) + query_ast_from_user_text(user_text, Some(default_fields)) .parse_user_query(&[]) - .expect("The user query should be valid."); - serde_json::to_string(&ast).expect("The query AST should be JSON serializable.") + .expect("The user query should be valid.") } /// Creates a QueryAST with a single UserInputQuery node. diff --git a/quickwit/quickwit-search/src/cluster_client.rs b/quickwit/quickwit-search/src/cluster_client.rs index 664ebc895e2..e6f28714b13 100644 --- a/quickwit/quickwit-search/src/cluster_client.rs +++ b/quickwit/quickwit-search/src/cluster_client.rs @@ -345,7 +345,7 @@ mod tests { PartialHit, SearchRequest, SearchStreamRequest, SortValue, SplitIdAndFooterOffsets, SplitSearchError, }; - use quickwit_query::query_ast::qast_helper; + use quickwit_query::query_ast::qast_string_helper; use super::*; use crate::root::SearchJob; @@ -364,7 +364,6 @@ mod tests { fn mock_doc_request(split_id: &str) -> FetchDocsRequest { FetchDocsRequest { partial_hits: Vec::new(), - index_id: "id".to_string(), index_uri: "uri".to_string(), split_offsets: vec![SplitIdAndFooterOffsets { split_id: split_id.to_string(), @@ -379,8 +378,8 @@ mod tests { fn mock_leaf_search_request() -> LeafSearchRequest { let search_request = SearchRequest { - index_id: "test-idx".to_string(), - query_ast: qast_helper("test", &["body"]), + index_id_patterns: vec!["test-idx".to_string()], + query_ast: qast_string_helper("test", &["body"]), max_hits: 10, ..Default::default() }; @@ -410,7 +409,7 @@ mod tests { fn mock_leaf_search_stream_request() -> LeafSearchStreamRequest { let search_request = SearchStreamRequest { index_id: "test-idx".to_string(), - query_ast: qast_helper("text", &["body"]), + query_ast: qast_string_helper("text", &["body"]), snippet_fields: Vec::new(), start_timestamp: None, end_timestamp: None, diff --git a/quickwit/quickwit-search/src/error.rs b/quickwit/quickwit-search/src/error.rs index 126e51e49fb..9f92176427b 100644 --- a/quickwit/quickwit-search/src/error.rs +++ b/quickwit/quickwit-search/src/error.rs @@ -26,12 +26,12 @@ use tantivy::TantivyError; use thiserror::Error; use tokio::task::JoinError; -/// Possible SearchError +/// Possible SearchError. #[allow(missing_docs)] #[derive(Error, Debug, Serialize, Deserialize, Clone)] pub enum SearchError { - #[error("Index `{index_id}` does not exist.")] - IndexDoesNotExist { index_id: String }, + #[error("Indexes IDs or index ID patterns `{index_id_patterns:?}` do not exist.")] + IndexesDoNotExist { index_id_patterns: Vec }, #[error("Internal error: `{0}`.")] InternalError(String), #[error("Storage not found: `{0}`)")] @@ -47,7 +47,7 @@ pub enum SearchError { impl ServiceError for SearchError { fn status_code(&self) -> ServiceErrorCode { match self { - SearchError::IndexDoesNotExist { .. } => ServiceErrorCode::NotFound, + SearchError::IndexesDoNotExist { .. } => ServiceErrorCode::NotFound, SearchError::InternalError(_) => ServiceErrorCode::Internal, SearchError::StorageResolverError(_) => ServiceErrorCode::BadRequest, SearchError::InvalidQuery(_) => ServiceErrorCode::BadRequest, @@ -102,9 +102,9 @@ impl From for SearchError { impl From for SearchError { fn from(metastore_error: MetastoreError) -> SearchError { match metastore_error { - MetastoreError::IndexDoesNotExist { index_id } => { - SearchError::IndexDoesNotExist { index_id } - } + MetastoreError::IndexesDoNotExist { index_ids } => SearchError::IndexesDoNotExist { + index_id_patterns: index_ids, + }, _ => SearchError::InternalError(format!("{metastore_error}")), } } diff --git a/quickwit/quickwit-search/src/leaf_cache.rs b/quickwit/quickwit-search/src/leaf_cache.rs index 6e0055f1346..b802ef56e4a 100644 --- a/quickwit/quickwit-search/src/leaf_cache.rs +++ b/quickwit/quickwit-search/src/leaf_cache.rs @@ -197,7 +197,7 @@ mod tests { }; let query_1 = SearchRequest { - index_id: "test-idx".to_string(), + index_id_patterns: vec!["test-idx".to_string()], query_ast: "test".to_string(), start_timestamp: None, end_timestamp: None, @@ -207,7 +207,7 @@ mod tests { }; let query_2 = SearchRequest { - index_id: "test-idx".to_string(), + index_id_patterns: vec!["test-idx".to_string()], query_ast: "test2".to_string(), start_timestamp: None, end_timestamp: None, @@ -265,7 +265,7 @@ mod tests { }; let query_1 = SearchRequest { - index_id: "test-idx".to_string(), + index_id_patterns: vec!["test-idx".to_string()], query_ast: "test".to_string(), start_timestamp: Some(100), end_timestamp: Some(250), @@ -274,7 +274,7 @@ mod tests { ..Default::default() }; let query_1bis = SearchRequest { - index_id: "test-idx".to_string(), + index_id_patterns: vec!["test-idx".to_string()], query_ast: "test".to_string(), start_timestamp: Some(150), end_timestamp: Some(300), @@ -284,7 +284,7 @@ mod tests { }; let query_2 = SearchRequest { - index_id: "test-idx".to_string(), + index_id_patterns: vec!["test-idx".to_string()], query_ast: "test2".to_string(), start_timestamp: None, end_timestamp: None, @@ -293,7 +293,7 @@ mod tests { ..Default::default() }; let query_2bis = SearchRequest { - index_id: "test-idx".to_string(), + index_id_patterns: vec!["test-idx".to_string()], query_ast: "test2".to_string(), start_timestamp: Some(50), end_timestamp: Some(200), diff --git a/quickwit/quickwit-search/src/lib.rs b/quickwit/quickwit-search/src/lib.rs index 01cd550567f..9993d6febb1 100644 --- a/quickwit/quickwit-search/src/lib.rs +++ b/quickwit/quickwit-search/src/lib.rs @@ -49,7 +49,6 @@ pub use collector::QuickwitAggregations; use metrics::SEARCH_METRICS; use quickwit_common::tower::Pool; use quickwit_doc_mapper::DocMapper; -use quickwit_query::query_ast::QueryAst; use tantivy::schema::NamedFieldDocument; /// Refer to this as `crate::Result`. @@ -60,9 +59,9 @@ use std::sync::Arc; pub use find_trace_ids_collector::FindTraceIdsCollector; use quickwit_config::SearcherConfig; -use quickwit_doc_mapper::tag_pruning::extract_tags_from_query; +use quickwit_doc_mapper::tag_pruning::TagFilterAst; use quickwit_metastore::{ListSplitsQuery, Metastore, SplitMetadata, SplitState}; -use quickwit_proto::{IndexUid, PartialHit, SearchRequest, SplitIdAndFooterOffsets}; +use quickwit_proto::{IndexUid, PartialHit, SplitIdAndFooterOffsets}; use quickwit_storage::StorageResolver; use tantivy::DocAddress; @@ -73,7 +72,9 @@ pub use crate::cluster_client::ClusterClient; pub use crate::error::{parse_grpc_error, SearchError}; use crate::fetch_docs::fetch_docs; use crate::leaf::{leaf_list_terms, leaf_search}; -pub use crate::root::{jobs_to_leaf_request, root_list_terms, root_search, SearchJob}; +pub use crate::root::{ + jobs_to_leaf_requests, root_list_terms, root_search, IndexMetasForLeafSearch, SearchJob, +}; pub use crate::search_job_placer::{Job, SearchJobPlacer}; pub use crate::search_response_rest::SearchResponseRest; pub use crate::search_stream::root_search_stream; @@ -120,28 +121,24 @@ fn extract_split_and_footer_offsets(split_metadata: &SplitMetadata) -> SplitIdAn /// Extract the list of relevant splits for a given search request. async fn list_relevant_splits( - // TODO: switch search request to index_uid and remove this. - index_uid: IndexUid, - search_request: &SearchRequest, + indexes_uids: Vec, + start_timestamp: Option, + end_timestamp: Option, + tags_filter_opt: Option, metastore: &dyn Metastore, ) -> crate::Result> { - let mut query = ListSplitsQuery::for_index(index_uid).with_split_state(SplitState::Published); + let mut query = + ListSplitsQuery::for_indexes(indexes_uids)?.with_split_state(SplitState::Published); - if let Some(start_ts) = search_request.start_timestamp { + if let Some(start_ts) = start_timestamp { query = query.with_time_range_start_gte(start_ts); } - if let Some(end_ts) = search_request.end_timestamp { + if let Some(end_ts) = end_timestamp { query = query.with_time_range_end_lt(end_ts); } - let query_ast: QueryAst = serde_json::from_str(&search_request.query_ast).map_err(|_| { - SearchError::InternalError(format!( - "Failed to deserialize query_ast: `{}`", - search_request.query_ast - )) - })?; - if let Some(tags_filter) = extract_tags_from_query(query_ast) { + if let Some(tags_filter) = tags_filter_opt { query = query.with_tags_filter(tags_filter); } diff --git a/quickwit/quickwit-search/src/retry/search.rs b/quickwit/quickwit-search/src/retry/search.rs index d60714f7265..380f51329ba 100644 --- a/quickwit/quickwit-search/src/retry/search.rs +++ b/quickwit/quickwit-search/src/retry/search.rs @@ -58,7 +58,7 @@ mod tests { LeafSearchRequest, LeafSearchResponse, SearchRequest, SplitIdAndFooterOffsets, SplitSearchError, }; - use quickwit_query::query_ast::qast_helper; + use quickwit_query::query_ast::qast_string_helper; use crate::retry::search::LeafSearchRetryPolicy; use crate::retry::RetryPolicy; @@ -67,8 +67,8 @@ mod tests { fn mock_leaf_search_request() -> LeafSearchRequest { LeafSearchRequest { search_request: Some(SearchRequest { - index_id: "test-idx".to_string(), - query_ast: qast_helper("test", &["body"]), + index_id_patterns: vec!["test-idx".to_string()], + query_ast: qast_string_helper("test", &["body"]), max_hits: 10, ..Default::default() }), diff --git a/quickwit/quickwit-search/src/root.rs b/quickwit/quickwit-search/src/root.rs index 1db04f8bfdc..94890354d42 100644 --- a/quickwit/quickwit-search/src/root.rs +++ b/quickwit/quickwit-search/src/root.rs @@ -18,7 +18,6 @@ // along with this program. If not, see . use std::collections::{HashMap, HashSet}; -use std::sync::Arc; use std::time::Duration; use anyhow::Context; @@ -27,16 +26,19 @@ use itertools::Itertools; use quickwit_common::shared_consts::{DELETION_GRACE_PERIOD, SCROLL_BATCH_LEN}; use quickwit_common::uri::Uri; use quickwit_config::{build_doc_mapper, IndexConfig}; +use quickwit_doc_mapper::tag_pruning::extract_tags_from_query; use quickwit_doc_mapper::{DocMapper, DYNAMIC_FIELD_NAME}; -use quickwit_metastore::{Metastore, SplitMetadata}; +use quickwit_metastore::{IndexMetadata, ListIndexesQuery, Metastore, SplitMetadata}; use quickwit_proto::{ - FetchDocsRequest, FetchDocsResponse, Hit, LeafHit, LeafListTermsRequest, LeafListTermsResponse, - LeafSearchRequest, LeafSearchResponse, ListTermsRequest, ListTermsResponse, PartialHit, - SearchRequest, SearchResponse, SnippetRequest, SortField, SplitIdAndFooterOffsets, + FetchDocsRequest, FetchDocsResponse, Hit, IndexUid, LeafHit, LeafListTermsRequest, + LeafListTermsResponse, LeafSearchRequest, LeafSearchResponse, ListTermsRequest, + ListTermsResponse, PartialHit, SearchRequest, SearchResponse, SnippetRequest, SortField, + SplitIdAndFooterOffsets, }; use quickwit_query::query_ast::{ BoolQuery, QueryAst, QueryAstVisitor, RangeQuery, TermQuery, TermSetQuery, }; +use serde::{Deserialize, Serialize}; use tantivy::aggregation::agg_result::AggregationResults; use tantivy::aggregation::intermediate_agg_result::IntermediateAggregationResults; use tantivy::collector::Collector; @@ -61,6 +63,7 @@ const MAX_SCROLL_TTL: Duration = Duration::from_secs(DELETION_GRACE_PERIOD.as_se /// SearchJob to be assigned to search clients by the [`SearchJobPlacer`]. #[derive(Debug, Clone, PartialEq)] pub struct SearchJob { + index_uid: IndexUid, cost: usize, offsets: SplitIdAndFooterOffsets, } @@ -69,6 +72,7 @@ impl SearchJob { #[cfg(test)] pub fn for_test(split_id: &str, cost: usize) -> SearchJob { SearchJob { + index_uid: IndexUid::from("test-index".to_string()), cost, offsets: SplitIdAndFooterOffsets { split_id: split_id.to_string(), @@ -87,6 +91,7 @@ impl From for SplitIdAndFooterOffsets { impl<'a> From<&'a SplitMetadata> for SearchJob { fn from(split_metadata: &'a SplitMetadata) -> Self { SearchJob { + index_uid: split_metadata.index_uid.clone(), cost: compute_split_cost(split_metadata), offsets: extract_split_and_footer_offsets(split_metadata), } @@ -103,7 +108,8 @@ impl Job for SearchJob { } } -pub(crate) struct FetchDocsJob { +pub struct FetchDocsJob { + index_uid: IndexUid, offsets: SplitIdAndFooterOffsets, pub partial_hits: Vec, } @@ -124,6 +130,103 @@ impl From for SplitIdAndFooterOffsets { } } +/// Index metas needed for executing a leaf search request. +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct IndexMetasForLeafSearch { + /// Index URI. + pub index_uri: Uri, + /// Doc mapper json string. + pub doc_mapper_str: String, +} + +pub(crate) type IndexesMetasForLeafSearch = HashMap; +type TimestampFieldOpt = Option; + +/// Validates request against each index's doc mapper and ensures that: +/// - timestamp fields (if any) are equal across indexes. +/// - resolved query ASTs are the same across indexes. +/// Returns the timestamp field, the resolved query AST and the indexes metadatas +/// needed for leaf search requests. +/// Note: the requirements on timestamp fields and resolved query ASTs can be lifted +/// but it adds complexity that does not seem needed right now. +fn validate_request_and_build_metadatas( + indexes_metadatas: &[IndexMetadata], + search_request: &SearchRequest, +) -> crate::Result<(TimestampFieldOpt, QueryAst, IndexesMetasForLeafSearch)> { + let mut metadatas_for_leaf: HashMap = HashMap::new(); + let query_ast: QueryAst = serde_json::from_str(&search_request.query_ast) + .map_err(|err| SearchError::InvalidQuery(err.to_string()))?; + let mut query_ast_resolved_opt: Option = None; + let mut timestamp_field_opt: Option = None; + + for index_metadata in indexes_metadatas.iter() { + let doc_mapper = build_doc_mapper( + &index_metadata.index_config.doc_mapping, + &index_metadata.index_config.search_settings, + ) + .map_err(|err| { + SearchError::InternalError(format!("Failed to build doc mapper. Cause: {err}")) + })?; + let query_ast_resolved_for_index = query_ast + .clone() + .parse_user_query(doc_mapper.default_search_fields()) + // We convert the error to return a 400 to the user (and not a 500). + .map_err(|err| SearchError::InvalidQuery(err.to_string()))?; + + // Validate uniqueness of resolved query AST. + if let Some(query_ast_resolved) = &query_ast_resolved_opt { + if query_ast_resolved != &query_ast_resolved_for_index { + return Err(SearchError::InvalidQuery( + "Resolved query ASTs must be the same across indexes. Resolving queries with \ + different default fields are different between indexes is not supported." + .to_string(), + )); + } + } else { + query_ast_resolved_opt = Some(query_ast_resolved_for_index.clone()); + } + + // Validate uniqueness of timestamp field if any. + if let Some(timestamp_field_for_index) = doc_mapper.timestamp_field_name() { + match timestamp_field_opt { + Some(timestamp_field) if timestamp_field != timestamp_field_for_index => { + return Err(SearchError::InvalidQuery( + "The timestamp field (if present) must be the same for all indexes." + .to_string(), + )); + } + None => { + timestamp_field_opt = Some(timestamp_field_for_index.to_string()); + } + _ => {} + } + } + + validate_request(&*doc_mapper, search_request)?; + // Validates the query by effectively building it against the current schema. + doc_mapper.query(doc_mapper.schema(), &query_ast_resolved_for_index, true)?; + + let index_metadata_for_leaf_search = IndexMetasForLeafSearch { + index_uri: index_metadata.index_uri().clone(), + doc_mapper_str: serde_json::to_string(&doc_mapper).map_err(|err| { + SearchError::InternalError(format!("Failed to serialize doc mapper. Cause: {err}")) + })?, + }; + metadatas_for_leaf.insert( + index_metadata.index_uid.clone(), + index_metadata_for_leaf_search, + ); + } + + let query_ast_resolved = query_ast_resolved_opt.ok_or_else(|| { + SearchError::InternalError( + "Resolved query AST must be present. This should never happen.".to_string(), + ) + })?; + + Ok((timestamp_field_opt, query_ast_resolved, metadatas_for_leaf)) +} + fn validate_requested_snippet_fields( schema: &Schema, snippet_fields: &[String], @@ -173,7 +276,7 @@ fn validate_sort_by_fields(sort_fields: &[SortField], schema: &Schema) -> crate: fn simplify_search_request_for_scroll_api(req: &SearchRequest) -> SearchRequest { // We do not mutate SearchRequest { - index_id: req.index_id.clone(), + index_id_patterns: req.index_id_patterns.clone(), query_ast: req.query_ast.clone(), start_timestamp: req.start_timestamp, end_timestamp: req.end_timestamp, @@ -264,12 +367,11 @@ fn get_scroll_ttl_duration(search_request: &SearchRequest) -> crate::Result crate::Result<(LeafSearchResponse, Option)> { @@ -284,9 +386,8 @@ async fn search_partial_hits_phase_with_scroll( search_request.scroll_ttl_secs = None; let mut leaf_search_resp = search_partial_hits_phase( searcher_context, + indexes_metas_for_leaf_search, &search_request, - index_uri, - doc_mapper_str, split_metadatas, cluster_client, ) @@ -296,8 +397,7 @@ async fn search_partial_hits_phase_with_scroll( let scroll_context_search_request = simplify_search_request_for_scroll_api(&search_request); let scroll_ctx = ScrollContext { - index_uri: index_uri.clone(), - doc_mapper_str: doc_mapper_str.to_string(), + indexes_metas_for_leaf_search: indexes_metas_for_leaf_search.clone(), split_metadatas: split_metadatas.to_vec(), search_request: scroll_context_search_request, total_num_hits: leaf_search_resp.num_hits, @@ -321,9 +421,8 @@ async fn search_partial_hits_phase_with_scroll( } else { let leaf_search_resp = search_partial_hits_phase( searcher_context, + indexes_metas_for_leaf_search, &search_request, - index_uri, - doc_mapper_str, split_metadatas, cluster_client, ) @@ -332,32 +431,28 @@ async fn search_partial_hits_phase_with_scroll( } } -#[instrument(skip(search_request, cluster_client))] +#[instrument(skip(search_request, indexes_metas_for_leaf_search, cluster_client))] pub(crate) async fn search_partial_hits_phase( searcher_context: &SearcherContext, + indexes_metas_for_leaf_search: &IndexesMetasForLeafSearch, search_request: &SearchRequest, - index_uri: &Uri, - doc_mapper_str: &str, split_metadatas: &[SplitMetadata], cluster_client: &ClusterClient, ) -> crate::Result { let jobs: Vec = split_metadatas.iter().map(SearchJob::from).collect(); - let assigned_leaf_search_jobs = cluster_client .search_job_placer .assign_jobs(jobs, &HashSet::default()) .await?; - let leaf_search_responses: Vec = - try_join_all(assigned_leaf_search_jobs.map(|(client, client_jobs)| { - let leaf_request = jobs_to_leaf_request( - search_request, - doc_mapper_str, - index_uri.as_ref(), - client_jobs, - ); - cluster_client.leaf_search(leaf_request, client) - })) - .await?; + let mut leaf_request_tasks = Vec::new(); + for (client, client_jobs) in assigned_leaf_search_jobs { + let leaf_requests = + jobs_to_leaf_requests(search_request, indexes_metas_for_leaf_search, client_jobs)?; + for leaf_request in leaf_requests { + leaf_request_tasks.push(cluster_client.leaf_search(leaf_request, client.clone())); + } + } + let leaf_search_responses: Vec = try_join_all(leaf_request_tasks).await?; // Creates a collector which merges responses into one let merge_collector = @@ -405,11 +500,9 @@ pub(crate) fn get_snippet_request(search_request: &SearchRequest) -> Option, cluster_client: &ClusterClient, ) -> crate::Result> { @@ -426,42 +519,28 @@ pub(crate) async fn fetch_docs_phase( }) .collect(); - let client_fetch_docs_task: Vec<(SearchServiceClient, Vec)> = - assign_client_fetch_doc_tasks( - partial_hits, - split_metadatas, - &cluster_client.search_job_placer, - ) - .await?; - - let fetch_docs_resp_futures = - client_fetch_docs_task - .into_iter() - .map(|(client, fetch_docs_jobs)| { - let partial_hits: Vec = fetch_docs_jobs - .iter() - .flat_map(|fetch_doc_job| fetch_doc_job.partial_hits.iter().cloned()) - .collect(); - let split_offsets: Vec = fetch_docs_jobs - .into_iter() - .map(|fetch_doc_job| fetch_doc_job.into()) - .collect(); - - let fetch_docs_req = FetchDocsRequest { - partial_hits, - index_id: index_id.to_string(), - split_offsets, - index_uri: index_uri.to_string(), - snippet_request: snippet_request_opt.clone(), - doc_mapper: doc_mapper_str.to_string(), - }; - cluster_client.fetch_docs(fetch_docs_req, client) - }); + let assigned_fetch_docs_jobs = assign_client_fetch_docs_jobs( + partial_hits, + split_metadatas, + &cluster_client.search_job_placer, + ) + .await?; - let fetch_docs_resps: Vec = try_join_all(fetch_docs_resp_futures).await?; + let mut fetch_docs_tasks = Vec::new(); + for (client, client_jobs) in assigned_fetch_docs_jobs { + let fetch_jobs_requests = jobs_to_fetch_docs_requests( + snippet_request_opt.clone(), + indexes_metas_for_leaf_search, + client_jobs, + )?; + for fetch_docs_request in fetch_jobs_requests { + fetch_docs_tasks.push(cluster_client.fetch_docs(fetch_docs_request, client.clone())); + } + } + let fetch_docs_responses: Vec = try_join_all(fetch_docs_tasks).await?; // Merge the fetched docs. - let leaf_hits = fetch_docs_resps + let leaf_hits = fetch_docs_responses .into_iter() .flat_map(|response| response.hits.into_iter()); @@ -499,28 +578,26 @@ pub(crate) async fn fetch_docs_phase( /// 2. Merges the search results. /// 3. Sends fetch docs requests to multiple leaf nodes. /// 4. Builds the response with docs and returns. -#[instrument(skip(search_request, cluster_client))] +#[instrument(skip( + searcher_context, + indexes_metas_for_leaf_search, + search_request, + cluster_client +))] async fn root_search_aux( searcher_context: &SearcherContext, + indexes_metas_for_leaf_search: &IndexesMetasForLeafSearch, search_request: SearchRequest, - index_uri: &Uri, - doc_mapper: Arc, - query_ast_resolved: QueryAst, split_metadatas: Vec, cluster_client: &ClusterClient, ) -> crate::Result { - let doc_mapper_str = serde_json::to_string(&*doc_mapper).map_err(|err| { - SearchError::InternalError(format!("Failed to serialize doc mapper: Cause {err}")) - })?; - let (first_phase_result, scroll_key_and_start_offset_opt): ( LeafSearchResponse, Option, ) = search_partial_hits_phase_with_scroll( searcher_context, + indexes_metas_for_leaf_search, search_request.clone(), - index_uri, - &doc_mapper_str, &split_metadatas[..], cluster_client, ) @@ -528,11 +605,9 @@ async fn root_search_aux( let snippet_request: Option = get_snippet_request(&search_request); let hits = fetch_docs_phase( + indexes_metas_for_leaf_search, &first_phase_result.partial_hits, &split_metadatas[..], - &search_request.index_id, - index_uri, - &doc_mapper_str, snippet_request, cluster_client, ) @@ -611,26 +686,24 @@ pub async fn root_search( cluster_client: &ClusterClient, ) -> crate::Result { let start_instant = tokio::time::Instant::now(); - let index_metadata = metastore.index_metadata(&search_request.index_id).await?; - let index_uid = index_metadata.index_uid.clone(); - let index_config = index_metadata.into_index_config(); - - let doc_mapper = build_doc_mapper(&index_config.doc_mapping, &index_config.search_settings) - .map_err(|err| { - SearchError::InternalError(format!("Failed to build doc mapper. Cause: {err}")) - })?; - - validate_request(&*doc_mapper, &search_request)?; - - let query_ast: QueryAst = serde_json::from_str(&search_request.query_ast) - .map_err(|err| SearchError::InvalidQuery(err.to_string()))?; - - let query_ast_resolved = query_ast - .parse_user_query(doc_mapper.default_search_fields()) - // We convert the error to return a 400 to the user (and not a 500). - .map_err(|err| SearchError::InvalidQuery(err.to_string()))?; - - if let Some(timestamp_field) = doc_mapper.timestamp_field_name() { + let indexes_metadatas = metastore + .list_indexes_metadatas(ListIndexesQuery::IndexIdPatterns( + search_request.index_id_patterns.clone(), + )) + .await?; + if indexes_metadatas.is_empty() { + return Err(SearchError::IndexesDoNotExist { + index_id_patterns: search_request.index_id_patterns, + }); + } + let index_uids = indexes_metadatas + .iter() + .map(|index_metadata| index_metadata.index_uid.clone()) + .collect_vec(); + let (timestamp_field_opt, query_ast_resolved, indexes_metas_for_leaf_search) = + validate_request_and_build_metadatas(&indexes_metadatas, &search_request)?; + search_request.query_ast = serde_json::to_string(&query_ast_resolved)?; + if let Some(timestamp_field) = ×tamp_field_opt { refine_start_end_timestamp_from_ast( &query_ast_resolved, timestamp_field, @@ -638,23 +711,21 @@ pub async fn root_search( &mut search_request.end_timestamp, ); } - - // Validates the query by effectively building it against the current schema. - doc_mapper.query(doc_mapper.schema(), &query_ast_resolved, true)?; - - search_request.query_ast = serde_json::to_string(&query_ast_resolved).map_err(|err| { - SearchError::InternalError(format!("Failed to serialize query ast: Cause {err}")) - })?; - - let split_metadatas: Vec = - list_relevant_splits(index_uid, &search_request, metastore).await?; + let tag_filter_ast = extract_tags_from_query(query_ast_resolved); + + let split_metadatas: Vec = list_relevant_splits( + index_uids, + search_request.start_timestamp, + search_request.end_timestamp, + tag_filter_ast, + metastore, + ) + .await?; let mut search_response = root_search_aux( searcher_context, + &indexes_metas_for_leaf_search, search_request, - &index_config.index_uri, - doc_mapper.clone(), - query_ast_resolved, split_metadatas, cluster_client, ) @@ -886,7 +957,6 @@ pub async fn root_list_terms( // Merging is a cpu-bound task, but probably fast enough to not require // spawning it on a blocking thread. - let merged_iter = leaf_search_responses .into_iter() .map(|leaf_search_response| leaf_search_response.terms) @@ -910,20 +980,24 @@ pub async fn root_list_terms( }) } -async fn assign_client_fetch_doc_tasks( +async fn assign_client_fetch_docs_jobs( partial_hits: &[PartialHit], split_metadatas: &[SplitMetadata], client_pool: &SearchJobPlacer, -) -> crate::Result)>> { - let split_offsets_map: HashMap = split_metadatas - .iter() - .map(|metadata| { - ( - metadata.split_id().to_string(), - extract_split_and_footer_offsets(metadata), - ) - }) - .collect(); +) -> crate::Result)>> { + let index_uids_and_split_offsets_map: HashMap = + split_metadatas + .iter() + .map(|metadata| { + ( + metadata.split_id().to_string(), + ( + metadata.index_uid.clone(), + extract_split_and_footer_offsets(metadata), + ), + ) + }) + .collect(); // Group the partial hits per split let mut partial_hits_map: HashMap> = HashMap::new(); @@ -936,25 +1010,26 @@ async fn assign_client_fetch_doc_tasks( let mut fetch_docs_req_jobs: Vec = Vec::new(); for (split_id, partial_hits) in partial_hits_map { - let offsets = split_offsets_map + let (index_uid, offsets) = index_uids_and_split_offsets_map .get(&split_id) .ok_or_else(|| { crate::SearchError::InternalError(format!( - "Received partial hit from an Unknown split {split_id}" + "Received partial hit from an unknown split {split_id}" )) })? .clone(); let fetch_docs_job = FetchDocsJob { + index_uid: index_uid.clone(), offsets, partial_hits, }; fetch_docs_req_jobs.push(fetch_docs_job); } - let assigned_jobs: Vec<(SearchServiceClient, Vec)> = client_pool + let assigned_jobs = client_pool .assign_jobs(fetch_docs_req_jobs, &HashSet::new()) - .await? - .collect(); + .await?; + Ok(assigned_jobs) } @@ -964,35 +1039,83 @@ fn compute_split_cost(_split_metadata: &SplitMetadata) -> usize { 1 } -/// Builds a [`LeafSearchRequest`] from a list of [`SearchJob`]. -pub fn jobs_to_leaf_request( +/// Builds a list of [`LeafSearchRequest`], one per index, from a list of [`SearchJob`]. +pub fn jobs_to_leaf_requests( request: &SearchRequest, - doc_mapper_str: &str, - index_uri: &str, // TODO make Uri + search_indexes_metadatas: &IndexesMetasForLeafSearch, jobs: Vec, -) -> LeafSearchRequest { - let mut request_with_offset_0 = request.clone(); - request_with_offset_0.start_offset = 0; - request_with_offset_0.max_hits += request.start_offset; - LeafSearchRequest { - search_request: Some(request_with_offset_0), - split_offsets: jobs.into_iter().map(|job| job.offsets).collect(), - doc_mapper: doc_mapper_str.to_string(), - index_uri: index_uri.to_string(), +) -> crate::Result> { + let mut search_request_for_leaf = request.clone(); + search_request_for_leaf.start_offset = 0; + search_request_for_leaf.max_hits += request.start_offset; + let mut leaf_search_requests = Vec::new(); + // Group jobs by index uid. + for (index_uid, job_group) in &jobs.into_iter().group_by(|job| job.index_uid.clone()) { + let search_index_meta = search_indexes_metadatas.get(&index_uid).ok_or_else(|| { + SearchError::InternalError(format!( + "Received search job for an unknown index {index_uid}. It should never happen." + )) + })?; + let leaf_search_request = LeafSearchRequest { + search_request: Some(search_request_for_leaf.clone()), + split_offsets: job_group.into_iter().map(|job| job.offsets).collect(), + doc_mapper: search_index_meta.doc_mapper_str.clone(), + index_uri: search_index_meta.index_uri.to_string(), + }; + leaf_search_requests.push(leaf_search_request); + } + Ok(leaf_search_requests) +} + +/// Builds a list of [`FetchDocsRequest`], one per index, from a list of [`FetchDocsJob`]. +pub fn jobs_to_fetch_docs_requests( + snippet_request_opt: Option, + indexes_metas_for_leaf_search: &IndexesMetasForLeafSearch, + jobs: Vec, +) -> crate::Result> { + let mut fetch_docs_requests = Vec::new(); + // Group jobs by index uid. + for (index_uid, job_group) in &jobs.into_iter().group_by(|job| job.index_uid.clone()) { + let index_meta = indexes_metas_for_leaf_search + .get(&index_uid) + .ok_or_else(|| { + SearchError::InternalError(format!( + "Received search job for an unknown index {index_uid}" + )) + })?; + let fetch_docs_jobs: Vec = job_group.collect(); + let partial_hits: Vec = fetch_docs_jobs + .iter() + .flat_map(|fetch_doc_job| fetch_doc_job.partial_hits.iter().cloned()) + .collect(); + let split_offsets: Vec = fetch_docs_jobs + .into_iter() + .map(|fetch_doc_job| fetch_doc_job.into()) + .collect(); + let fetch_docs_req = FetchDocsRequest { + partial_hits, + split_offsets, + index_uri: index_meta.index_uri.to_string(), + snippet_request: snippet_request_opt.clone(), + doc_mapper: index_meta.doc_mapper_str.clone(), + }; + fetch_docs_requests.push(fetch_docs_req); } + Ok(fetch_docs_requests) } #[cfg(test)] mod tests { use std::ops::Range; + use std::str::FromStr; use std::sync::{Arc, RwLock}; use quickwit_common::shared_consts::SCROLL_BATCH_LEN; - use quickwit_config::SearcherConfig; - use quickwit_indexing::mock_split; + use quickwit_config::{DocMapping, IndexingSettings, SearchSettings, SearcherConfig}; + use quickwit_indexing::MockSplitBuilder; use quickwit_metastore::{IndexMetadata, MockMetastore}; use quickwit_proto::{ScrollRequest, SortOrder, SortValue, SplitSearchError}; - use quickwit_query::query_ast::qast_helper; + use quickwit_query::query_ast::{qast_helper, qast_string_helper, query_ast_from_user_text}; use tantivy::schema::{FAST, STORED, TEXT}; use super::*; @@ -1031,6 +1154,112 @@ mod tests { ); } + #[test] + fn test_validate_request_and_build_metadatas_ok() { + let request_query_ast = qast_helper("body:test", &[]); + let search_request = quickwit_proto::SearchRequest { + index_id_patterns: vec!["test-index".to_string()], + query_ast: serde_json::to_string(&request_query_ast).unwrap(), + max_hits: 10, + start_offset: 10, + ..Default::default() + }; + let index_metadata = IndexMetadata::for_test("test-index-1", "ram:///test-index-1"); + let index_metadata_with_other_config = + index_metadata_for_multi_indexes_test("test-index-2", "ram:///test-index-2"); + let mut index_metadata_no_timestamp = + IndexMetadata::for_test("test-index-3", "ram:///test-index-3"); + index_metadata_no_timestamp + .index_config + .doc_mapping + .timestamp_field = None; + let (timestamp_field, query_ast, indexes_metas_for_leaf_req) = + validate_request_and_build_metadatas( + &[ + index_metadata, + index_metadata_with_other_config, + index_metadata_no_timestamp, + ], + &search_request, + ) + .unwrap(); + assert_eq!(timestamp_field, Some("timestamp".to_string())); + assert_eq!(query_ast, request_query_ast); + assert_eq!(indexes_metas_for_leaf_req.len(), 3); + } + + #[test] + fn test_validate_request_and_build_metadatas_fail_with_different_timestamps() { + let search_request = quickwit_proto::SearchRequest { + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("test", &["body"]), + max_hits: 10, + start_offset: 10, + ..Default::default() + }; + let index_metadata_1 = IndexMetadata::for_test("test-index-1", "ram:///test-index-1"); + let mut index_metadata_2 = IndexMetadata::for_test("test-index-2", "ram:///test-index-2"); + let doc_mapping_json_2 = r#"{ + "mode": "lenient", + "field_mappings": [ + { + "name": "timestamp-2", + "type": "datetime", + "fast": true + }, + { + "name": "body", + "type": "text" + } + ], + "timestamp_field": "timestamp-2", + "store_source": true + }"#; + let doc_mapping_2: DocMapping = serde_json::from_str(doc_mapping_json_2).unwrap(); + index_metadata_2.index_config.doc_mapping = doc_mapping_2; + index_metadata_2 + .index_config + .search_settings + .default_search_fields = Vec::new(); + let timestamp_field_different = validate_request_and_build_metadatas( + &[index_metadata_1, index_metadata_2], + &search_request, + ) + .unwrap_err(); + assert_eq!( + timestamp_field_different.to_string(), + "The timestamp field (if present) must be the same for all indexes." + ); + } + + #[test] + fn test_validate_request_and_build_metadatas_fail_with_different_resolved_qast() { + let qast = query_ast_from_user_text("test", None); + let search_request = quickwit_proto::SearchRequest { + index_id_patterns: vec!["test-index".to_string()], + query_ast: serde_json::to_string(&qast).unwrap(), + max_hits: 10, + start_offset: 10, + ..Default::default() + }; + let index_metadata_1 = IndexMetadata::for_test("test-index-1", "ram:///test-index-1"); + let mut index_metadata_2 = IndexMetadata::for_test("test-index-2", "ram:///test-index-2"); + index_metadata_2 + .index_config + .search_settings + .default_search_fields = vec!["owner".to_string()]; + let timestamp_field_different = validate_request_and_build_metadatas( + &[index_metadata_1, index_metadata_2], + &search_request, + ) + .unwrap_err(); + assert_eq!( + timestamp_field_different.to_string(), + "Resolved query ASTs must be the same across indexes. Resolving queries with \ + different default fields are different between indexes is not supported." + ); + } + fn mock_partial_hit( split_id: &str, sort_value: u64, @@ -1067,24 +1296,28 @@ mod tests { #[tokio::test] async fn test_root_search_offset_out_of_bounds_1085() -> anyhow::Result<()> { let search_request = quickwit_proto::SearchRequest { - index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("test", &["body"]), max_hits: 10, start_offset: 10, ..Default::default() }; let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore - .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split1"), mock_split("split2")])); + .expect_list_indexes_metadatas() + .returning(move |_index_ids_query: ListIndexesQuery| Ok(vec![index_metadata.clone()])); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![ + MockSplitBuilder::new("split1") + .with_index_uid(&index_uid) + .build(), + MockSplitBuilder::new("split2") + .with_index_uid(&index_uid) + .build(), + ]) + }); let mut mock_search_service_2 = MockSearchService::new(); mock_search_service_2.expect_leaf_search().returning( |_leaf_search_req: quickwit_proto::LeafSearchRequest| { @@ -1153,23 +1386,22 @@ mod tests { #[tokio::test] async fn test_root_search_single_split() -> anyhow::Result<()> { let search_request = quickwit_proto::SearchRequest { - index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("test", &["body"]), max_hits: 10, ..Default::default() }; let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore - .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split1")])); + .expect_list_indexes_metadatas() + .returning(move |_index_ids_query| Ok(vec![index_metadata.clone()])); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![MockSplitBuilder::new("split1") + .with_index_uid(&index_uid) + .build()]) + }); let mut mock_search_service = MockSearchService::new(); mock_search_service.expect_leaf_search().returning( |_leaf_search_req: quickwit_proto::LeafSearchRequest| { @@ -1213,23 +1445,27 @@ mod tests { #[tokio::test] async fn test_root_search_multiple_splits() -> anyhow::Result<()> { let search_request = quickwit_proto::SearchRequest { - index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("test", &["body"]), max_hits: 10, ..Default::default() }; let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore - .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split1"), mock_split("split2")])); + .expect_list_indexes_metadatas() + .returning(move |_index_ids_query| Ok(vec![index_metadata.clone()])); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![ + MockSplitBuilder::new("split1") + .with_index_uid(&index_uid) + .build(), + MockSplitBuilder::new("split2") + .with_index_uid(&index_uid) + .build(), + ]) + }); let mut mock_search_service_1 = MockSearchService::new(); mock_search_service_1.expect_leaf_search().returning( |_leaf_search_req: quickwit_proto::LeafSearchRequest| { @@ -1294,8 +1530,8 @@ mod tests { async fn test_root_search_multiple_splits_sort_heteregeneous_field_ascending( ) -> anyhow::Result<()> { let mut search_request = quickwit_proto::SearchRequest { - index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("test", &["body"]), max_hits: 10, ..Default::default() }; @@ -1303,17 +1539,21 @@ mod tests { sort_field.set_sort_order(SortOrder::Asc); } let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore - .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split1"), mock_split("split2")])); + .expect_list_indexes_metadatas() + .returning(move |_index_ids_query| Ok(vec![index_metadata.clone()])); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![ + MockSplitBuilder::new("split1") + .with_index_uid(&index_uid) + .build(), + MockSplitBuilder::new("split2") + .with_index_uid(&index_uid) + .build(), + ]) + }); let mut mock_search_service_1 = MockSearchService::new(); mock_search_service_1.expect_leaf_search().returning( |_leaf_search_req: quickwit_proto::LeafSearchRequest| { @@ -1462,23 +1702,27 @@ mod tests { async fn test_root_search_multiple_splits_sort_heteregeneous_field_descending( ) -> anyhow::Result<()> { let search_request = quickwit_proto::SearchRequest { - index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("test", &["body"]), max_hits: 10, ..Default::default() }; let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore - .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split1"), mock_split("split2")])); + .expect_list_indexes_metadatas() + .returning(move |_index_ids_query| Ok(vec![index_metadata.clone()])); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![ + MockSplitBuilder::new("split1") + .with_index_uid(&index_uid) + .build(), + MockSplitBuilder::new("split2") + .with_index_uid(&index_uid) + .build(), + ]) + }); let mut mock_search_service_1 = MockSearchService::new(); mock_search_service_1.expect_leaf_search().returning( |_leaf_search_req: quickwit_proto::LeafSearchRequest| { @@ -1626,23 +1870,27 @@ mod tests { #[tokio::test] async fn test_root_search_multiple_splits_retry_on_other_node() -> anyhow::Result<()> { let search_request = quickwit_proto::SearchRequest { - index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("test", &["body"]), max_hits: 10, ..Default::default() }; let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore - .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split1"), mock_split("split2")])); + .expect_list_indexes_metadatas() + .returning(move |_index_ids_query| Ok(vec![index_metadata.clone()])); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![ + MockSplitBuilder::new("split1") + .with_index_uid(&index_uid) + .build(), + MockSplitBuilder::new("split2") + .with_index_uid(&index_uid) + .build(), + ]) + }); let mut mock_search_service_1 = MockSearchService::new(); mock_search_service_1 @@ -1732,23 +1980,27 @@ mod tests { #[tokio::test] async fn test_root_search_multiple_splits_retry_on_all_nodes() -> anyhow::Result<()> { let search_request = quickwit_proto::SearchRequest { - index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("test", &["body"]), max_hits: 10, ..Default::default() }; let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore - .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split1"), mock_split("split2")])); + .expect_list_indexes_metadatas() + .returning(move |_index_ids_query| Ok(vec![index_metadata.clone()])); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![ + MockSplitBuilder::new("split1") + .with_index_uid(&index_uid) + .build(), + MockSplitBuilder::new("split2") + .with_index_uid(&index_uid) + .build(), + ]) + }); let mut mock_search_service_1 = MockSearchService::new(); mock_search_service_1 .expect_leaf_search() @@ -1852,23 +2104,22 @@ mod tests { #[tokio::test] async fn test_root_search_single_split_retry_single_node() -> anyhow::Result<()> { let search_request = quickwit_proto::SearchRequest { - index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("test", &["body"]), max_hits: 10, ..Default::default() }; let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore - .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split1")])); + .expect_list_indexes_metadatas() + .returning(move |_index_ids_query| Ok(vec![index_metadata.clone()])); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![MockSplitBuilder::new("split1") + .with_index_uid(&index_uid) + .build()]) + }); let mut first_call = true; let mut mock_search_service = MockSearchService::new(); mock_search_service.expect_leaf_search().times(2).returning( @@ -1924,23 +2175,22 @@ mod tests { #[tokio::test] async fn test_root_search_single_split_retry_single_node_fails() -> anyhow::Result<()> { let search_request = quickwit_proto::SearchRequest { - index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("test", &["body"]), max_hits: 10, ..Default::default() }; let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore - .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split1")])); + .expect_list_indexes_metadatas() + .returning(move |_index_ids_query| Ok(vec![index_metadata.clone()])); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![MockSplitBuilder::new("split1") + .with_index_uid(&index_uid) + .build()]) + }); let mut mock_search_service = MockSearchService::new(); mock_search_service.expect_leaf_search().times(2).returning( @@ -1981,23 +2231,22 @@ mod tests { async fn test_root_search_one_splits_two_nodes_but_one_is_failing_for_split( ) -> anyhow::Result<()> { let search_request = quickwit_proto::SearchRequest { - index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("test", &["body"]), max_hits: 10, ..Default::default() }; let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore - .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split1")])); + .expect_list_indexes_metadatas() + .returning(move |_index_ids_query| Ok(vec![index_metadata.clone()])); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![MockSplitBuilder::new("split1") + .with_index_uid(&index_uid) + .build()]) + }); // Service1 - broken node. let mut mock_search_service_1 = MockSearchService::new(); mock_search_service_1.expect_leaf_search().returning( @@ -2064,23 +2313,22 @@ mod tests { async fn test_root_search_one_splits_two_nodes_but_one_is_failing_completely( ) -> anyhow::Result<()> { let search_request = quickwit_proto::SearchRequest { - index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("test", &["body"]), max_hits: 10, ..Default::default() }; let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore - .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split1")])); + .expect_list_indexes_metadatas() + .returning(move |_index_ids_query| Ok(vec![index_metadata.clone()])); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![MockSplitBuilder::new("split1") + .with_index_uid(&index_uid) + .build()]) + }); // Service1 - working node. let mut mock_search_service_1 = MockSearchService::new(); @@ -2136,17 +2384,16 @@ mod tests { #[tokio::test] async fn test_root_search_invalid_queries() -> anyhow::Result<()> { let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore - .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split")])); + .expect_list_indexes_metadatas() + .returning(move |_index_ids_query| Ok(vec![index_metadata.clone()])); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![MockSplitBuilder::new("split") + .with_index_uid(&index_uid) + .build()]) + }); let searcher_pool = searcher_pool_for_test([("127.0.0.1:1001", MockSearchService::new())]); let search_job_placer = SearchJobPlacer::new(searcher_pool); @@ -2155,8 +2402,8 @@ mod tests { assert!(root_search( &SearcherContext::new(SearcherConfig::default()), quickwit_proto::SearchRequest { - index_id: "test-index".to_string(), - query_ast: qast_helper("invalid_field:\"test\"", &["body"]), + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("invalid_field:\"test\"", &["body"]), max_hits: 10, ..Default::default() }, @@ -2169,8 +2416,8 @@ mod tests { assert!(root_search( &SearcherContext::new(SearcherConfig::default()), quickwit_proto::SearchRequest { - index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["invalid_field"]), + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("test", &["invalid_field"]), max_hits: 10, ..Default::default() }, @@ -2205,24 +2452,23 @@ mod tests { }"#; let search_request = quickwit_proto::SearchRequest { - index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("test", &["body"]), max_hits: 10, aggregation_request: Some(agg_req.to_string()), ..Default::default() }; let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore - .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split1")])); + .expect_list_indexes_metadatas() + .returning(move |_index_ids_query| Ok(vec![index_metadata.clone()])); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![MockSplitBuilder::new("split1") + .with_index_uid(&index_uid) + .build()]) + }); let searcher_pool = searcher_pool_for_test([("127.0.0.1:1001", MockSearchService::new())]); let search_job_placer = SearchJobPlacer::new(searcher_pool); let cluster_client = ClusterClient::new(search_job_placer.clone()); @@ -2245,24 +2491,23 @@ mod tests { #[tokio::test] async fn test_root_search_invalid_request() -> anyhow::Result<()> { let search_request = quickwit_proto::SearchRequest { - index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("test", &["body"]), max_hits: 10, start_offset: 20_000, ..Default::default() }; let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore - .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split1")])); + .expect_list_indexes_metadatas() + .returning(move |_index_ids_query| Ok(vec![index_metadata.clone()])); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![MockSplitBuilder::new("split1") + .with_index_uid(&index_uid) + .build()]) + }); let searcher_pool = searcher_pool_for_test([("127.0.0.1:1001", MockSearchService::new())]); let search_job_placer = SearchJobPlacer::new(searcher_pool); let cluster_client = ClusterClient::new(search_job_placer.clone()); @@ -2280,8 +2525,8 @@ mod tests { ); let search_request = quickwit_proto::SearchRequest { - index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("test", &["body"]), max_hits: 20_000, ..Default::default() }; @@ -2414,37 +2659,58 @@ mod tests { assert_eq!(timestamp_range_extractor.end_timestamp, Some(1620283880)); } - fn create_search_resp(hit_range: Range) -> LeafSearchResponse { - let truncate_range = hit_range.start.min(TOTAL_NUM_HITS)..hit_range.end.min(TOTAL_NUM_HITS); + fn create_search_resp(index_uri: &str, hit_range: Range) -> LeafSearchResponse { + let (num_total_hits, split_id) = match index_uri { + "ram:///test-index-1" => (TOTAL_NUM_HITS_INDEX_1, "split1"), + "ram:///test-index-2" => (TOTAL_NUM_HITS_INDEX_2, "split2"), + _ => panic!("unexpected index uri"), + }; + let truncate_range = hit_range.start.min(num_total_hits)..hit_range.end.min(num_total_hits); quickwit_proto::LeafSearchResponse { - num_hits: TOTAL_NUM_HITS as u64, + num_hits: num_total_hits as u64, partial_hits: truncate_range - .map(|doc_id| mock_partial_hit("split1", u64::MAX - doc_id as u64, doc_id as u32)) + .map(|doc_id| { + let sort_value = match index_uri { + "ram:///test-index-1" => u64::MAX - doc_id as u64, + "ram:///test-index-2" => (TOTAL_NUM_HITS_INDEX_2 - doc_id) as u64, + _ => panic!("unexpected index uri"), + }; + mock_partial_hit(split_id, sort_value, doc_id as u32) + }) .collect(), num_attempted_splits: 1, ..Default::default() } } - const TOTAL_NUM_HITS: usize = 2_005; + const TOTAL_NUM_HITS_INDEX_1: usize = 2_005; + const TOTAL_NUM_HITS_INDEX_2: usize = 10; const MAX_HITS_PER_PAGE: usize = 93; #[tokio::test] async fn test_root_search_with_scroll() { let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index-1", "ram:///test-index-1"); + let index_uid = index_metadata.index_uid.clone(); + let index_metadata_2 = IndexMetadata::for_test("test-index-2", "ram:///test-index-2"); + let index_uid_2 = index_metadata_2.index_uid.clone(); metastore - .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) + .expect_list_indexes_metadatas() + .returning(move |_index_ids_query| { + Ok(vec![index_metadata.clone(), index_metadata_2.clone()]) }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split1")])); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![ + MockSplitBuilder::new("split1") + .with_index_uid(&index_uid) + .build(), + MockSplitBuilder::new("split2") + .with_index_uid(&index_uid_2) + .build(), + ]) + }); let mut mock_search_service = MockSearchService::new(); - mock_search_service.expect_leaf_search().once().returning( + mock_search_service.expect_leaf_search().times(2).returning( |req: quickwit_proto::LeafSearchRequest| { let search_req: &SearchRequest = req.search_request.as_ref().unwrap(); // the leaf request does not need to know about the scroll_ttl. @@ -2452,12 +2718,13 @@ mod tests { assert!(search_req.scroll_ttl_secs.is_none()); assert_eq!(search_req.max_hits as usize, SCROLL_BATCH_LEN); Ok(create_search_resp( + &req.index_uri, search_req.start_offset as usize ..(search_req.start_offset + search_req.max_hits) as usize, )) }, ); - mock_search_service.expect_leaf_search().once().returning( + mock_search_service.expect_leaf_search().times(2).returning( |req: quickwit_proto::LeafSearchRequest| { let search_req: &SearchRequest = req.search_request.as_ref().unwrap(); // the leaf request does not need to know about the scroll_ttl. @@ -2465,12 +2732,13 @@ mod tests { assert!(search_req.scroll_ttl_secs.is_none()); assert_eq!(search_req.max_hits as usize, 2 * SCROLL_BATCH_LEN); Ok(create_search_resp( + &req.index_uri, search_req.start_offset as usize ..(search_req.start_offset + search_req.max_hits) as usize, )) }, ); - mock_search_service.expect_leaf_search().once().returning( + mock_search_service.expect_leaf_search().times(2).returning( |req: quickwit_proto::LeafSearchRequest| { let search_req: &SearchRequest = req.search_request.as_ref().unwrap(); // the leaf request does not need to know about the scroll_ttl. @@ -2478,6 +2746,7 @@ mod tests { assert!(search_req.scroll_ttl_secs.is_none()); assert_eq!(search_req.max_hits as usize, 3 * SCROLL_BATCH_LEN); Ok(create_search_resp( + &req.index_uri, search_req.start_offset as usize ..(search_req.start_offset + search_req.max_hits) as usize, )) @@ -2513,8 +2782,8 @@ mod tests { let mut scroll_id: String = { let search_request = quickwit_proto::SearchRequest { - index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + index_id_patterns: vec!["test-index".to_string()], + query_ast: qast_string_helper("test", &["body"]), max_hits: MAX_HITS_PER_PAGE as u64, scroll_ttl_secs: Some(60), ..Default::default() @@ -2527,7 +2796,10 @@ mod tests { ) .await .unwrap(); - assert_eq!(search_response.num_hits, TOTAL_NUM_HITS as u64); + assert_eq!( + search_response.num_hits, + (TOTAL_NUM_HITS_INDEX_1 + TOTAL_NUM_HITS_INDEX_2) as u64 + ); assert_eq!(search_response.hits.len(), MAX_HITS_PER_PAGE); for (i, hit) in search_response.hits.iter().enumerate() { assert_eq!( @@ -2547,13 +2819,29 @@ mod tests { crate::service::scroll(scroll_req, &cluster_client, &searcher_context) .await .unwrap(); - assert_eq!(scroll_resp.num_hits, TOTAL_NUM_HITS as u64); + assert_eq!( + scroll_resp.num_hits, + (TOTAL_NUM_HITS_INDEX_1 + TOTAL_NUM_HITS_INDEX_2) as u64 + ); for (i, hit) in scroll_resp.hits.iter().enumerate() { let doc = (page * MAX_HITS_PER_PAGE as u64) + i as u64; - assert_eq!( - hit.partial_hit.as_ref().unwrap(), - &mock_partial_hit("split1", u64::MAX - doc, doc as u32) - ); + if doc < TOTAL_NUM_HITS_INDEX_1 as u64 { + assert_eq!( + hit.partial_hit.as_ref().unwrap(), + &mock_partial_hit("split1", u64::MAX - doc, doc as u32) + ); + } else { + // Docs from index 2 come after the ones from index 1. + let doc = doc - TOTAL_NUM_HITS_INDEX_1 as u64; + assert_eq!( + hit.partial_hit.as_ref().unwrap(), + &mock_partial_hit( + "split2", + TOTAL_NUM_HITS_INDEX_2 as u64 - doc, + doc as u32 + ) + ); + } } scroll_id = scroll_resp.scroll_id.unwrap(); count_seen_hits += scroll_resp.hits.len(); @@ -2562,6 +2850,150 @@ mod tests { } } - assert_eq!(count_seen_hits, TOTAL_NUM_HITS); + assert_eq!( + count_seen_hits, + TOTAL_NUM_HITS_INDEX_1 + TOTAL_NUM_HITS_INDEX_2 + ); + } + + fn index_metadata_for_multi_indexes_test(index_id: &str, index_uri: &str) -> IndexMetadata { + let index_uri = Uri::from_str(index_uri).unwrap(); + let doc_mapping_json = r#"{ + "mode": "lenient", + "field_mappings": [ + { + "name": "timestamp", + "type": "datetime", + "fast": true + }, + { + "name": "body", + "type": "text", + "stored": true + } + ], + "timestamp_field": "timestamp", + "store_source": true + }"#; + let doc_mapping = serde_json::from_str(doc_mapping_json).unwrap(); + let indexing_settings = IndexingSettings::default(); + let search_settings = SearchSettings { + default_search_fields: vec!["body".to_string()], + }; + IndexMetadata::new(IndexConfig { + index_id: index_id.to_string(), + index_uri, + doc_mapping, + indexing_settings, + search_settings, + retention_policy: Default::default(), + }) + } + + #[tokio::test] + async fn test_root_search_multi_indices() -> anyhow::Result<()> { + let search_request = quickwit_proto::SearchRequest { + index_id_patterns: vec!["test-index-*".to_string()], + query_ast: qast_string_helper("test", &["body"]), + max_hits: 10, + ..Default::default() + }; + let mut metastore = MockMetastore::new(); + let index_metadata_1 = IndexMetadata::for_test("test-index-1", "ram:///test-index-1"); + let index_uid_1 = index_metadata_1.index_uid.clone(); + let index_metadata_2 = + index_metadata_for_multi_indexes_test("test-index-2", "ram:///test-index-2"); + let index_uid_2 = index_metadata_2.index_uid.clone(); + let index_metadata_3 = + index_metadata_for_multi_indexes_test("test-index-3", "ram:///test-index-3"); + let index_uid_3 = index_metadata_3.index_uid.clone(); + metastore.expect_list_indexes_metadatas().return_once( + move |index_ids_query: ListIndexesQuery| { + match index_ids_query { + ListIndexesQuery::IndexIdPatterns(index_ids_query) => { + assert_eq!(index_ids_query, vec!["test-index-*".to_string()]); + } + ListIndexesQuery::All => { + panic!("Unexpected empty index_ids_query"); + } + } + Ok(vec![index_metadata_1, index_metadata_2, index_metadata_3]) + }, + ); + metastore + .expect_list_splits() + .return_once(move |list_splits_query| { + assert!( + list_splits_query.index_uids + == vec![ + index_uid_1.clone(), + index_uid_2.clone(), + index_uid_3.clone() + ] + ); + Ok(vec![ + MockSplitBuilder::new("index-1-split-1") + .with_index_uid(&index_uid_1) + .build(), + MockSplitBuilder::new("index-1-split-2") + .with_index_uid(&index_uid_1) + .build(), + MockSplitBuilder::new("index-2-split-1") + .with_index_uid(&index_uid_2) + .build(), + ]) + }); + let mut mock_search_service_1 = MockSearchService::new(); + mock_search_service_1 + .expect_leaf_search() + .times(2) + .withf(|leaf_search_req| { + (leaf_search_req.index_uri == "ram:///test-index-1" + && leaf_search_req.split_offsets.len() == 2) + || (leaf_search_req.index_uri == "ram:///test-index-2" + && leaf_search_req.split_offsets[0].split_id == "index-2-split-1") + }) + .returning(|leaf_search_req: quickwit_proto::LeafSearchRequest| { + let partial_hits = leaf_search_req + .split_offsets + .iter() + .map(|split_offset| mock_partial_hit(&split_offset.split_id, 3, 1)) + .collect_vec(); + Ok(quickwit_proto::LeafSearchResponse { + num_hits: leaf_search_req.split_offsets.len() as u64, + partial_hits, + failed_splits: Vec::new(), + num_attempted_splits: 1, + ..Default::default() + }) + }); + mock_search_service_1 + .expect_fetch_docs() + .times(2) + .withf(|fetch_docs_req: &FetchDocsRequest| { + (fetch_docs_req.index_uri == "ram:///test-index-1" + && fetch_docs_req.partial_hits.len() == 2) + || (fetch_docs_req.index_uri == "ram:///test-index-2" + && fetch_docs_req.partial_hits[0].split_id == "index-2-split-1") + }) + .returning(|fetch_docs_req| { + Ok(quickwit_proto::FetchDocsResponse { + hits: get_doc_for_fetch_req(fetch_docs_req), + }) + }); + let searcher_pool = searcher_pool_for_test([("127.0.0.1:1001", mock_search_service_1)]); + let search_job_placer = SearchJobPlacer::new(searcher_pool); + let cluster_client = ClusterClient::new(search_job_placer.clone()); + let search_response = root_search( + &SearcherContext::new(SearcherConfig::default()), + search_request, + &metastore, + &cluster_client, + ) + .await + .unwrap(); + assert_eq!(search_response.num_hits, 3); + assert_eq!(search_response.hits.len(), 3); + Ok(()) } } diff --git a/quickwit/quickwit-search/src/scroll_context.rs b/quickwit/quickwit-search/src/scroll_context.rs index a846e42935a..0d66a7de36b 100644 --- a/quickwit/quickwit-search/src/scroll_context.rs +++ b/quickwit/quickwit-search/src/scroll_context.rs @@ -17,6 +17,7 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . +use std::collections::HashMap; use std::ops::Range; use std::str::FromStr; use std::sync::Arc; @@ -25,14 +26,14 @@ use std::time::Duration; use anyhow::Context; use base64::prelude::BASE64_STANDARD; use base64::Engine; -use quickwit_common::uri::Uri; use quickwit_metastore::SplitMetadata; -use quickwit_proto::{LeafSearchResponse, PartialHit, SearchRequest}; +use quickwit_proto::{IndexUid, LeafSearchResponse, PartialHit, SearchRequest}; use serde::{Deserialize, Serialize}; use tokio::sync::RwLock; use ttl_cache::TtlCache; use ulid::Ulid; +use crate::root::IndexMetasForLeafSearch; use crate::service::SearcherContext; use crate::ClusterClient; @@ -49,8 +50,7 @@ const SCROLL_BATCH_LEN: usize = 1_000; pub(crate) struct ScrollContext { pub split_metadatas: Vec, pub search_request: SearchRequest, - pub index_uri: Uri, - pub doc_mapper_str: String, + pub indexes_metas_for_leaf_search: HashMap, pub total_num_hits: u64, pub max_hits_per_page: u64, pub cached_partial_hits_start_offset: u64, @@ -96,7 +96,7 @@ impl ScrollContext { } /// Loads in the `ScrollContext` cache all the - /// hits in range [start_offset..start_offset + SCROLL_BATCH_LEN) + /// hits in range [start_offset..start_offset + SCROLL_BATCH_LEN). pub async fn load_batch_starting_at( &mut self, start_offset: u64, @@ -110,9 +110,8 @@ impl ScrollContext { self.search_request.start_offset = start_offset; let leaf_search_response: LeafSearchResponse = crate::root::search_partial_hits_phase( searcher_context, + &self.indexes_metas_for_leaf_search, &self.search_request, - &self.index_uri, - &self.doc_mapper_str, &self.split_metadatas[..], cluster_client, ) diff --git a/quickwit/quickwit-search/src/search_stream/leaf.rs b/quickwit/quickwit-search/src/search_stream/leaf.rs index 6ef89827731..bb2dd2a353c 100644 --- a/quickwit/quickwit-search/src/search_stream/leaf.rs +++ b/quickwit/quickwit-search/src/search_stream/leaf.rs @@ -460,7 +460,7 @@ mod tests { use itertools::Itertools; use quickwit_config::SearcherConfig; use quickwit_indexing::TestSandbox; - use quickwit_query::query_ast::qast_helper; + use quickwit_query::query_ast::qast_string_helper; use serde_json::json; use tantivy::time::{Duration, OffsetDateTime}; @@ -497,7 +497,7 @@ mod tests { let request = SearchStreamRequest { index_id: index_id.to_string(), - query_ast: qast_helper("info", &["body"]), + query_ast: qast_string_helper("info", &["body"]), snippet_fields: Vec::new(), start_timestamp: None, end_timestamp: Some(end_timestamp), @@ -573,7 +573,7 @@ mod tests { .unix_timestamp(); let request = SearchStreamRequest { index_id: index_id.to_string(), - query_ast: qast_helper("info", &["body"]), + query_ast: qast_string_helper("info", &["body"]), snippet_fields: Vec::new(), start_timestamp: None, end_timestamp: Some(end_timestamp), @@ -628,7 +628,7 @@ mod tests { let request = SearchStreamRequest { index_id: index_id.to_string(), - query_ast: qast_helper("info", &["body"]), + query_ast: qast_string_helper("info", &["body"]), snippet_fields: Vec::new(), start_timestamp: None, end_timestamp: None, @@ -716,7 +716,7 @@ mod tests { let request = SearchStreamRequest { index_id: index_id.to_string(), - query_ast: qast_helper("info", &["body"]), + query_ast: qast_string_helper("info", &["body"]), snippet_fields: Vec::new(), start_timestamp: None, end_timestamp: Some(end_timestamp), diff --git a/quickwit/quickwit-search/src/search_stream/root.rs b/quickwit/quickwit-search/src/search_stream/root.rs index 64ebef9ad0f..76d7d3d3fce 100644 --- a/quickwit/quickwit-search/src/search_stream/root.rs +++ b/quickwit/quickwit-search/src/search_stream/root.rs @@ -23,6 +23,7 @@ use bytes::Bytes; use futures::{StreamExt, TryStreamExt}; use quickwit_common::uri::Uri; use quickwit_config::build_doc_mapper; +use quickwit_doc_mapper::tag_pruning::extract_tags_from_query; use quickwit_metastore::Metastore; use quickwit_proto::{LeafSearchStreamRequest, SearchRequest, SearchStreamRequest}; use quickwit_query::query_ast::QueryAst; @@ -57,6 +58,7 @@ pub async fn root_search_stream( let query_ast: QueryAst = serde_json::from_str(&search_stream_request.query_ast) .map_err(|err| SearchError::InvalidQuery(err.to_string()))?; let query_ast_resolved = query_ast.parse_user_query(doc_mapper.default_search_fields())?; + let tags_filter_ast = extract_tags_from_query(query_ast_resolved.clone()); if let Some(timestamp_field) = doc_mapper.timestamp_field_name() { refine_start_end_timestamp_from_ast( @@ -72,7 +74,14 @@ pub async fn root_search_stream( search_stream_request.query_ast = serde_json::to_string(&query_ast_resolved)?; let search_request = SearchRequest::try_from(search_stream_request.clone())?; - let split_metadatas = list_relevant_splits(index_uid, &search_request, metastore).await?; + let split_metadatas = list_relevant_splits( + vec![index_uid], + search_request.start_timestamp, + search_request.end_timestamp, + tags_filter_ast, + metastore, + ) + .await?; let doc_mapper_str = serde_json::to_string(&doc_mapper).map_err(|err| { SearchError::InternalError(format!("Failed to serialize doc mapper: Cause {err}")) @@ -120,10 +129,10 @@ fn jobs_to_leaf_request( #[cfg(test)] mod tests { - use quickwit_indexing::mock_split; + use quickwit_indexing::MockSplitBuilder; use quickwit_metastore::{IndexMetadata, MockMetastore}; use quickwit_proto::OutputFormat; - use quickwit_query::query_ast::qast_helper; + use quickwit_query::query_ast::qast_string_helper; use tokio_stream::wrappers::UnboundedReceiverStream; use super::*; @@ -133,23 +142,22 @@ mod tests { async fn test_root_search_stream_single_split() -> anyhow::Result<()> { let request = quickwit_proto::SearchStreamRequest { index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + query_ast: qast_string_helper("test", &["body"]), fast_field: "timestamp".to_string(), output_format: OutputFormat::Csv as i32, ..Default::default() }; let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split1")])); + .returning(move |_index_id: &str| Ok(index_metadata.clone())); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![MockSplitBuilder::new("split1") + .with_index_uid(&index_uid) + .build()]) + }); let mut mock_search_service = MockSearchService::new(); let (result_sender, result_receiver) = tokio::sync::mpsc::unbounded_channel(); result_sender.send(Ok(quickwit_proto::LeafSearchStreamResponse { @@ -185,24 +193,23 @@ mod tests { async fn test_root_search_stream_single_split_partitionned() -> anyhow::Result<()> { let request = quickwit_proto::SearchStreamRequest { index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + query_ast: qast_string_helper("test", &["body"]), fast_field: "timestamp".to_string(), output_format: OutputFormat::Csv as i32, partition_by_field: Some("timestamp".to_string()), ..Default::default() }; let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split1")])); + .returning(move |_index_id: &str| Ok(index_metadata.clone())); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![MockSplitBuilder::new("split1") + .with_index_uid(&index_uid) + .build()]) + }); let mut mock_search_service = MockSearchService::new(); let (result_sender, result_receiver) = tokio::sync::mpsc::unbounded_channel(); result_sender.send(Ok(quickwit_proto::LeafSearchStreamResponse { @@ -236,23 +243,27 @@ mod tests { async fn test_root_search_stream_single_split_with_error() -> anyhow::Result<()> { let request = quickwit_proto::SearchStreamRequest { index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + query_ast: qast_string_helper("test", &["body"]), fast_field: "timestamp".to_string(), output_format: OutputFormat::Csv as i32, ..Default::default() }; let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split1"), mock_split("split2")])); + .returning(move |_index_id: &str| Ok(index_metadata.clone())); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![ + MockSplitBuilder::new("split1") + .with_index_uid(&index_uid) + .build(), + MockSplitBuilder::new("split2") + .with_index_uid(&index_uid) + .build(), + ]) + }); let mut mock_search_service = MockSearchService::new(); let (result_sender, result_receiver) = tokio::sync::mpsc::unbounded_channel(); result_sender.send(Ok(quickwit_proto::LeafSearchStreamResponse { @@ -292,17 +303,16 @@ mod tests { #[tokio::test] async fn test_root_search_stream_with_invalid_query() -> anyhow::Result<()> { let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split")])); + .returning(move |_index_id: &str| Ok(index_metadata.clone())); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![MockSplitBuilder::new("split") + .with_index_uid(&index_uid) + .build()]) + }); let searcher_pool = searcher_pool_for_test([("127.0.0.1:1001", MockSearchService::new())]); let search_job_placer = SearchJobPlacer::new(searcher_pool); @@ -310,7 +320,7 @@ mod tests { assert!(root_search_stream( quickwit_proto::SearchStreamRequest { index_id: "test-index".to_string(), - query_ast: qast_helper(r#"invalid_field:"test""#, &[]), + query_ast: qast_string_helper(r#"invalid_field:"test""#, &[]), fast_field: "timestamp".to_string(), output_format: OutputFormat::Csv as i32, partition_by_field: Some("timestamp".to_string()), @@ -325,7 +335,7 @@ mod tests { assert!(root_search_stream( quickwit_proto::SearchStreamRequest { index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["invalid_field"]), + query_ast: qast_string_helper("test", &["invalid_field"]), fast_field: "timestamp".to_string(), output_format: OutputFormat::Csv as i32, partition_by_field: Some("timestamp".to_string()), diff --git a/quickwit/quickwit-search/src/service.rs b/quickwit/quickwit-search/src/service.rs index 01d18374aaa..bf13dc00991 100644 --- a/quickwit/quickwit-search/src/service.rs +++ b/quickwit/quickwit-search/src/service.rs @@ -178,7 +178,7 @@ impl SearchService for SearchServiceImpl { let search_request = leaf_search_request .search_request .ok_or_else(|| SearchError::InternalError("No search request.".to_string()))?; - info!(index=?search_request.index_id, splits=?leaf_search_request.split_offsets, "leaf_search"); + info!(index=?search_request.index_id_patterns, splits=?leaf_search_request.split_offsets, "leaf_search"); let storage = self .storage_resolver .resolve(&Uri::from_well_formed(leaf_search_request.index_uri)) @@ -370,11 +370,9 @@ pub(crate) async fn scroll( // Fetch the actual documents. let hits: Vec = fetch_docs_phase( + &scroll_context.indexes_metas_for_leaf_search, &partial_hits[..], &scroll_context.split_metadatas[..], - &scroll_context.search_request.index_id, - &scroll_context.index_uri, - &scroll_context.doc_mapper_str, snippet_request, cluster_client, ) diff --git a/quickwit/quickwit-search/src/tests.rs b/quickwit/quickwit-search/src/tests.rs index 3018b433108..38a4eaccd70 100644 --- a/quickwit/quickwit-search/src/tests.rs +++ b/quickwit/quickwit-search/src/tests.rs @@ -22,6 +22,7 @@ use std::net::Ipv4Addr; use assert_json_diff::{assert_json_eq, assert_json_include}; use quickwit_config::SearcherConfig; +use quickwit_doc_mapper::tag_pruning::extract_tags_from_query; use quickwit_doc_mapper::DefaultDocMapper; use quickwit_indexing::TestSandbox; use quickwit_opentelemetry::otlp::TraceId; @@ -29,7 +30,9 @@ use quickwit_proto::{ LeafListTermsResponse, SearchRequest, SearchResponse, SortByValue, SortField, SortOrder, SortValue, }; -use quickwit_query::query_ast::{qast_helper, query_ast_from_user_text}; +use quickwit_query::query_ast::{ + qast_helper, qast_string_helper, query_ast_from_user_text, QueryAst, +}; use serde_json::{json, Value as JsonValue}; use tantivy::schema::Value as TantivyValue; use tantivy::time::OffsetDateTime; @@ -60,8 +63,8 @@ async fn test_single_node_simple() -> anyhow::Result<()> { ]; test_sandbox.add_documents(docs.clone()).await?; let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper("anthropomorphic", &["body"]), + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper("anthropomorphic", &["body"]), max_hits: 2, ..Default::default() }; @@ -103,8 +106,8 @@ async fn test_single_node_termset() -> anyhow::Result<()> { ]; test_sandbox.add_documents(docs.clone()).await?; let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper("title: IN [beagle]", &[]), + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper("title: IN [beagle]", &[]), start_timestamp: None, end_timestamp: None, max_hits: 2, @@ -146,8 +149,8 @@ async fn test_single_search_with_snippet() -> anyhow::Result<()> { ]; test_sandbox.add_documents(docs.clone()).await?; let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper("beagle", &["title", "body"]), + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper("beagle", &["title", "body"]), snippet_fields: vec!["title".to_string(), "body".to_string()], max_hits: 2, ..Default::default() @@ -184,9 +187,9 @@ async fn slop_search_and_check( query: &str, expected_num_match: u64, ) -> anyhow::Result<()> { - let query_ast = qast_helper(query, &["body"]); + let query_ast = qast_string_helper(query, &["body"]); let search_request = SearchRequest { - index_id: index_id.to_string(), + index_id_patterns: vec![index_id.to_string()], query_ast, max_hits: 5, ..Default::default() @@ -339,7 +342,7 @@ async fn test_single_node_several_splits() -> anyhow::Result<()> { let query_ast = query_ast_from_user_text("beagle", None); let query_ast_json = serde_json::to_string(&query_ast).unwrap(); let search_request = SearchRequest { - index_id: index_id.to_string(), + index_id_patterns: vec![index_id.to_string()], query_ast: query_ast_json, max_hits: 6, ..Default::default() @@ -408,8 +411,8 @@ async fn test_single_node_filtering() -> anyhow::Result<()> { test_sandbox.add_documents(docs).await?; let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper("info", &["body"]), + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper("info", &["body"]), start_timestamp: Some(start_timestamp + 10), end_timestamp: Some(start_timestamp + 20), max_hits: 15, @@ -432,8 +435,8 @@ async fn test_single_node_filtering() -> anyhow::Result<()> { // filter on time range [i64::MIN 20[ should only hit first 19 docs because of filtering let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper("info", &["body"]), + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper("info", &["body"]), end_timestamp: Some(start_timestamp + 20), max_hits: 25, sort_fields: vec![SortField { @@ -455,8 +458,8 @@ async fn test_single_node_filtering() -> anyhow::Result<()> { // filter on tag, should return an error since no split is tagged let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper("tag:foo AND info", &["body"]), + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper("tag:foo AND info", &["body"]), max_hits: 25, sort_fields: vec![SortField { field_name: "ts".to_string(), @@ -540,8 +543,8 @@ async fn single_node_search_sort_by_field( test_sandbox.add_documents(docs).await?; let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper("city", &["description"]), + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper("city", &["description"]), max_hits: 15, sort_fields: vec![SortField { field_name: sort_by_field.to_string(), @@ -625,7 +628,7 @@ async fn test_sort_bm25() { let search_hits = |query: &str| { let query_ast_json = serde_json::to_string(&query_ast_from_user_text(query, None)).unwrap(); let search_request = SearchRequest { - index_id: index_id.to_string(), + index_id_patterns: vec![index_id.to_string()], query_ast: query_ast_json, max_hits: 1_000, sort_fields: vec![SortField { @@ -717,7 +720,7 @@ async fn test_sort_by_static_and_dynamic_field() { let search_hits = |sort_field: &str, order: SortOrder| { let query_ast_json = serde_json::to_string(&QueryAst::MatchAll).unwrap(); let search_request = SearchRequest { - index_id: index_id.to_string(), + index_id_patterns: vec![index_id.to_string()], query_ast: query_ast_json, max_hits: 1_000, sort_fields: vec![SortField { @@ -814,7 +817,7 @@ async fn test_sort_by_2_field() { |sort_field1: &str, order1: SortOrder, sort_field2: &str, order2: SortOrder| { let query_ast_json = serde_json::to_string(&QueryAst::MatchAll).unwrap(); let search_request = SearchRequest { - index_id: index_id.to_string(), + index_id_patterns: vec![index_id.to_string()], query_ast: query_ast_json, max_hits: 1_000, sort_fields: vec![ @@ -897,8 +900,8 @@ async fn test_single_node_invalid_sorting_with_query() { test_sandbox.add_documents(docs).await.unwrap(); let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper("city", &["description"]), + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper("city", &["description"]), max_hits: 15, sort_fields: vec![SortField { field_name: "description".to_string(), @@ -944,43 +947,37 @@ async fn test_single_node_split_pruning_by_tags() -> anyhow::Result<()> { test_sandbox.add_documents(docs).await?; } - let query_ast: String = qast_helper("owner:francois", &[]); + let query_ast: QueryAst = qast_helper("owner:francois", &[]); let selected_splits = list_relevant_splits( - index_uid.clone(), - &SearchRequest { - index_id: index_id.to_string(), - query_ast, - ..Default::default() - }, + vec![index_uid.clone()], + None, + None, + extract_tags_from_query(query_ast), &*test_sandbox.metastore(), ) .await?; assert!(selected_splits.is_empty()); - let query_ast: String = qast_helper("", &[]); + let query_ast: QueryAst = qast_helper("", &[]); let selected_splits = list_relevant_splits( - index_uid.clone(), - &SearchRequest { - index_id: index_id.to_string(), - query_ast, - ..Default::default() - }, + vec![index_uid.clone()], + None, + None, + extract_tags_from_query(query_ast), &*test_sandbox.metastore(), ) .await?; assert_eq!(selected_splits.len(), 2); - let query_ast: String = qast_helper("owner:francois OR owner:paul OR owner:adrien", &[]); + let query_ast: QueryAst = qast_helper("owner:francois OR owner:paul OR owner:adrien", &[]); let selected_splits = list_relevant_splits( - index_uid.clone(), - &SearchRequest { - index_id: index_id.to_string(), - query_ast, - ..Default::default() - }, + vec![index_uid.clone()], + None, + None, + extract_tags_from_query(query_ast), &*test_sandbox.metastore(), ) .await?; @@ -1011,8 +1008,8 @@ async fn test_search_util(test_sandbox: &TestSandbox, query: &str) -> Vec { .map(|split_meta| extract_split_and_footer_offsets(&split_meta.split_metadata)) .collect(); let request = quickwit_proto::SearchRequest { - index_id: test_sandbox.index_uid().index_id().to_string(), - query_ast: qast_helper(query, &[]), + index_id_patterns: vec![test_sandbox.index_uid().index_id().to_string()], + query_ast: qast_string_helper(query, &[]), max_hits: 100, ..Default::default() }; @@ -1331,8 +1328,8 @@ async fn test_single_node_aggregation() -> anyhow::Result<()> { test_sandbox.add_documents(docs.clone()).await?; let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper("*", &[]), + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper("*", &[]), max_hits: 2, aggregation_request: Some(agg_req.to_string()), ..Default::default() @@ -1404,8 +1401,8 @@ async fn test_single_node_aggregation_missing_fast_field() { test_sandbox.add_documents(docs.clone()).await.unwrap(); let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper("*", &[]), + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper("*", &[]), max_hits: 2, aggregation_request: Some(agg_req.to_string()), ..Default::default() @@ -1446,8 +1443,8 @@ async fn test_single_node_with_ip_field() -> anyhow::Result<()> { test_sandbox.add_documents(docs.clone()).await?; { let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper("*", &[]), + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper("*", &[]), max_hits: 10, ..Default::default() }; @@ -1462,8 +1459,8 @@ async fn test_single_node_with_ip_field() -> anyhow::Result<()> { } { let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper("10.10.11.125", &["host"]), + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper("10.10.11.125", &["host"]), max_hits: 10, ..Default::default() }; @@ -1518,8 +1515,8 @@ async fn test_single_node_range_queries() -> anyhow::Result<()> { test_sandbox.add_documents(docs).await?; { let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper( + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper( "datetime:[2023-01-10T15:13:36Z TO 2023-01-10T15:13:38Z}", &[], ), @@ -1537,8 +1534,8 @@ async fn test_single_node_range_queries() -> anyhow::Result<()> { } { let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper("status_code:[400 TO 401]", &[]), + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper("status_code:[400 TO 401]", &[]), max_hits: 10, ..Default::default() }; @@ -1553,8 +1550,8 @@ async fn test_single_node_range_queries() -> anyhow::Result<()> { } { let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper("host:[10.0.0.0 TO 10.255.255.255]", &[]), + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper("host:[10.0.0.0 TO 10.255.255.255]", &[]), max_hits: 10, ..Default::default() }; @@ -1569,8 +1566,8 @@ async fn test_single_node_range_queries() -> anyhow::Result<()> { } { let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper("latency:[100 TO *]", &[]), + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper("latency:[100 TO *]", &[]), max_hits: 10, ..Default::default() }; @@ -1585,8 +1582,8 @@ async fn test_single_node_range_queries() -> anyhow::Result<()> { } { let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper("error_code:[-1 TO 1]", &[]), + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper("error_code:[-1 TO 1]", &[]), max_hits: 10, ..Default::default() }; @@ -1775,8 +1772,8 @@ async fn test_single_node_find_trace_ids_collector() { .to_string(); let search_request = SearchRequest { - index_id: index_id.to_string(), - query_ast: qast_helper("*", &[]), + index_id_patterns: vec![index_id.to_string()], + query_ast: qast_string_helper("*", &[]), aggregation_request: Some(aggregations), ..Default::default() }; diff --git a/quickwit/quickwit-serve/src/elastic_search_api/filter.rs b/quickwit/quickwit-serve/src/elastic_search_api/filter.rs index a113105a717..dbc7fbfcc11 100644 --- a/quickwit/quickwit-serve/src/elastic_search_api/filter.rs +++ b/quickwit/quickwit-serve/src/elastic_search_api/filter.rs @@ -27,6 +27,7 @@ use super::model::MultiSearchQueryParams; use crate::elastic_search_api::model::{ ElasticIngestOptions, ScrollQueryParams, SearchBody, SearchQueryParams, }; +use crate::search_api::extract_index_id_patterns; const BODY_LENGTH_LIMIT: Byte = byte_unit::Byte::from_bytes(1_000_000); const CONTENT_LENGTH_LIMIT: Byte = byte_unit::Byte::from_bytes(10 * 1024 * 1024); // 10MiB @@ -101,23 +102,10 @@ fn json_or_empty( #[utoipa::path(get, tag = "Search", path = "/{index}/_search")] pub(crate) fn elastic_index_search_filter( -) -> impl Filter + Clone { +) -> impl Filter, SearchQueryParams, SearchBody), Error = Rejection> + Clone +{ warp::path!("_elastic" / String / "_search") - .and_then(|comma_separated_indexes: String| async move { - if comma_separated_indexes.contains(',') { - return Err(warp::reject::custom(crate::rest::InvalidArgument(format!( - "Searching only one index is supported for now. Got \ - (`{comma_separated_indexes}`)" - )))); - } - let index = comma_separated_indexes.trim(); - if index.is_empty() { - return Err(warp::reject::custom(crate::rest::InvalidArgument( - "Missing index name.".to_string(), - ))); - } - Ok(index.to_string()) - }) + .and_then(extract_index_id_patterns) .and(warp::get().or(warp::post()).unify()) .and(serde_qs::warp::query(serde_qs::Config::default())) .and(json_or_empty()) diff --git a/quickwit/quickwit-serve/src/elastic_search_api/mod.rs b/quickwit/quickwit-serve/src/elastic_search_api/mod.rs index ad51542e2b1..2a1053c3365 100644 --- a/quickwit/quickwit-serve/src/elastic_search_api/mod.rs +++ b/quickwit/quickwit-serve/src/elastic_search_api/mod.rs @@ -118,10 +118,10 @@ mod tests { .expect_root_search() .with(predicate::function( |search_request: &quickwit_proto::SearchRequest| { - (search_request.index_id == "index-1" + (search_request.index_id_patterns == vec!["index-1".to_string()] && search_request.start_offset == 5 && search_request.max_hits == 20) - || (search_request.index_id == "index-2" + || (search_request.index_id_patterns == vec!["index-2".to_string()] && search_request.start_offset == 0 && search_request.max_hits == 10) }, @@ -162,7 +162,10 @@ mod tests { mock_search_service .expect_root_search() .returning(|search_request| { - if search_request.index_id == "index-1" { + if search_request + .index_id_patterns + .contains(&"index-1".to_string()) + { Ok(Default::default()) } else { Err(quickwit_search::SearchError::InternalError( @@ -305,15 +308,29 @@ mod tests { .await; assert_eq!(resp.status(), 400); let es_error: ElasticSearchError = serde_json::from_slice(resp.body()).unwrap(); - assert!(es_error.error.reason.unwrap().starts_with( - "Invalid argument: `_msearch` must define one `index` in the request header" - )); + assert_eq!( + es_error.error.reason.unwrap(), + "Invalid argument: `_msearch` request header must define at least one index." + ); } #[tokio::test] async fn test_msearch_api_return_400_with_multiple_indexes() { let config = Arc::new(NodeConfig::for_test()); - let mock_search_service = MockSearchService::new(); + let mut mock_search_service = MockSearchService::new(); + mock_search_service + .expect_root_search() + .returning(|search_request| { + if search_request.index_id_patterns + == vec!["index-1".to_string(), "index-2".to_string()] + { + Ok(Default::default()) + } else { + Err(quickwit_search::SearchError::InternalError( + "something bad happened".to_string(), + )) + } + }); let es_search_api_handler = super::elastic_api_handlers( config, Arc::new(mock_search_service), @@ -321,7 +338,7 @@ mod tests { ); let msearch_payload = r#" {"index": ["index-1", "index-2"]} - {"query":{"query_string":{"bad":"test"}}} + {"query":{"query_string":{"query":"test"}}} "#; let resp = warp::test::request() .path("/_elastic/_msearch") @@ -329,13 +346,7 @@ mod tests { .body(msearch_payload) .reply(&es_search_api_handler) .await; - assert_eq!(resp.status(), 400); - let es_error: ElasticSearchError = serde_json::from_slice(resp.body()).unwrap(); - assert!(es_error - .error - .reason - .unwrap() - .starts_with("Invalid argument: Searching only one index is supported for now.")); + assert_eq!(resp.status(), 200); } #[tokio::test] diff --git a/quickwit/quickwit-serve/src/elastic_search_api/rest_handler.rs b/quickwit/quickwit-serve/src/elastic_search_api/rest_handler.rs index bec9ba5e5cf..1f22d7051e9 100644 --- a/quickwit/quickwit-serve/src/elastic_search_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/elastic_search_api/rest_handler.rs @@ -29,7 +29,7 @@ use futures_util::StreamExt; use hyper::StatusCode; use itertools::Itertools; use quickwit_common::truncate_str; -use quickwit_config::NodeConfig; +use quickwit_config::{validate_index_id_pattern, NodeConfig}; use quickwit_proto::{ScrollRequest, SearchResponse, ServiceErrorCode}; use quickwit_query::query_ast::{QueryAst, UserInputQuery}; use quickwit_query::BooleanOperand; @@ -126,7 +126,7 @@ pub fn es_compat_index_multi_search_handler( } fn build_request_for_es_api( - index_id: String, + index_id_patterns: Vec, search_params: SearchQueryParams, search_body: SearchBody, ) -> Result { @@ -176,7 +176,7 @@ fn build_request_for_es_api( let scroll_ttl_secs: Option = scroll_duration.map(|duration| duration.as_secs() as u32); Ok(quickwit_proto::SearchRequest { - index_id, + index_id_patterns, query_ast: serde_json::to_string(&query_ast).expect("Failed to serialize QueryAst"), max_hits, start_offset, @@ -190,13 +190,13 @@ fn build_request_for_es_api( } async fn es_compat_index_search( - index_id: String, + index_id_patterns: Vec, search_params: SearchQueryParams, search_body: SearchBody, search_service: Arc, ) -> Result { let start_instant = Instant::now(); - let search_request = build_request_for_es_api(index_id, search_params, search_body)?; + let search_request = build_request_for_es_api(index_id_patterns, search_params, search_body)?; let search_response: SearchResponse = search_service.root_search(search_request).await?; let elapsed = start_instant.elapsed(); let mut search_response_rest: ElasticSearchResponse = @@ -242,20 +242,20 @@ async fn es_compat_index_multi_search( err )) })?; - if request_header.index.len() != 1 { - let message = if request_header.index.is_empty() { - "`_msearch` must define one `index` in the request header. Got none.".to_string() - } else { - format!( - "Searching only one index is supported for now. Got {:?}", - request_header.index - ) - }; + if request_header.index.is_empty() { return Err(ElasticSearchError::from(SearchError::InvalidArgument( - message, + "`_msearch` request header must define at least one index.".to_string(), ))); } - let index_id = request_header.index[0].clone(); + for index in &request_header.index { + validate_index_id_pattern(index).map_err(|err| { + SearchError::InvalidArgument(format!( + "Request header contains an invalid index: {}", + err + )) + })?; + } + let index_ids_patterns = request_header.index.clone(); let search_body = payload_lines .next() .ok_or_else(|| { @@ -271,7 +271,8 @@ async fn es_compat_index_multi_search( }) })?; let search_query_params = SearchQueryParams::from(request_header); - let es_request = build_request_for_es_api(index_id, search_query_params, search_body)?; + let es_request = + build_request_for_es_api(index_ids_patterns, search_query_params, search_body)?; search_requests.push(es_request); } let futures = search_requests.into_iter().map(|search_request| async { diff --git a/quickwit/quickwit-serve/src/index_api/rest_handler.rs b/quickwit/quickwit-serve/src/index_api/rest_handler.rs index 1ed44e8e52e..4f43e32b030 100644 --- a/quickwit/quickwit-serve/src/index_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/index_api/rest_handler.rs @@ -29,7 +29,8 @@ use quickwit_config::{ use quickwit_doc_mapper::{analyze_text, TokenizerConfig}; use quickwit_index_management::{IndexService, IndexServiceError}; use quickwit_metastore::{ - IndexMetadata, ListSplitsQuery, Metastore, MetastoreError, Split, SplitInfo, SplitState, + IndexMetadata, ListIndexesQuery, ListSplitsQuery, Metastore, MetastoreError, Split, SplitInfo, + SplitState, }; use quickwit_proto::IndexUid; use serde::de::DeserializeOwned; @@ -373,7 +374,9 @@ async fn get_indexes_metadatas( metastore: Arc, ) -> Result, MetastoreError> { info!("get-indexes-metadatas"); - metastore.list_indexes_metadatas().await + metastore + .list_indexes_metadatas(ListIndexesQuery::All) + .await } #[derive(Deserialize, utoipa::IntoParams, utoipa::ToSchema)] @@ -768,8 +771,10 @@ mod tests { use assert_json_diff::assert_json_include; use quickwit_common::uri::Uri; use quickwit_config::{SourceParams, VecSourceParams}; - use quickwit_indexing::mock_split; - use quickwit_metastore::{metastore_for_test, IndexMetadata, MetastoreError, MockMetastore}; + use quickwit_indexing::{mock_split, MockSplitBuilder}; + use quickwit_metastore::{ + metastore_for_test, IndexMetadata, ListIndexesQuery, MetastoreError, MockMetastore, + }; use quickwit_storage::StorageResolver; use serde::__private::from_utf8_lossy; use serde_json::Value as JsonValue; @@ -830,26 +835,26 @@ mod tests { #[tokio::test] async fn test_get_splits() { let mut metastore = MockMetastore::new(); + let index_metadata = + IndexMetadata::for_test("quickwit-demo-index", "ram:///indexes/quickwit-demo-index"); + let index_uid = index_metadata.index_uid.clone(); metastore .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "quickwit-demo-index", - "ram:///indexes/quickwit-demo-index", - )) - }) + .returning(move |_index_id: &str| Ok(index_metadata.clone())) .times(2); metastore .expect_list_splits() - .returning(|list_split_query: ListSplitsQuery| { - if list_split_query.index_uid.index_id() == "quickwit-demo-index" + .returning(move |list_split_query: ListSplitsQuery| { + if list_split_query.index_uids.contains(&index_uid) && list_split_query.split_states == vec![SplitState::Published, SplitState::Staged] && list_split_query.time_range.start == Bound::Included(10) && list_split_query.time_range.end == Bound::Excluded(20) && list_split_query.create_timestamp.end == Bound::Excluded(2) { - return Ok(vec![mock_split("split_1")]); + return Ok(vec![MockSplitBuilder::new("split_1") + .with_index_uid(&index_uid) + .build()]); } Err(MetastoreError::InternalError { message: "".to_string(), @@ -897,25 +902,27 @@ mod tests { #[tokio::test] async fn test_describe_index() -> anyhow::Result<()> { let mut metastore = MockMetastore::new(); + let index_metadata = + IndexMetadata::for_test("quickwit-demo-index", "ram:///indexes/quickwit-demo-index"); + let index_uid = index_metadata.index_uid.clone(); metastore .expect_index_metadata() - .return_once(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - let split_1 = mock_split("split_1"); + .return_once(move |_index_id: &str| Ok(index_metadata)); + let split_1 = MockSplitBuilder::new("split_1") + .with_index_uid(&index_uid) + .build(); let split_1_time_range = split_1.split_metadata.time_range.clone().unwrap(); - let mut split_2 = mock_split("split_2"); + let mut split_2 = MockSplitBuilder::new("split_2") + .with_index_uid(&index_uid) + .build(); split_2.split_metadata.time_range = Some(RangeInclusive::new( split_1_time_range.start() - 10, split_1_time_range.end() + 10, )); metastore .expect_list_splits() - .return_once(|list_split_query: ListSplitsQuery| { - if list_split_query.index_uid.index_id() == "test-index" { + .return_once(move |list_split_query: ListSplitsQuery| { + if list_split_query.index_uids.contains(&index_uid) { return Ok(vec![split_1, split_2]); } Err(MetastoreError::InternalError { @@ -931,15 +938,15 @@ mod tests { ) .recover(recover_fn); let resp = warp::test::request() - .path("/indexes/test-index/describe") + .path("/indexes/quickwit-demo-index/describe") .reply(&index_management_handler) .await; assert_eq!(resp.status(), 200); let actual_response_json: JsonValue = serde_json::from_slice(resp.body()).unwrap(); let expected_response_json = serde_json::json!({ - "index_id": "test-index", - "index_uri": "ram:///indexes/test-index", + "index_id": "quickwit-demo-index", + "index_uri": "ram:///indexes/quickwit-demo-index", "num_published_splits": 2, "size_published_splits": 1600, "num_published_docs": 20, @@ -956,18 +963,16 @@ mod tests { #[tokio::test] async fn test_get_all_splits() { let mut metastore = MockMetastore::new(); + let index_metadata = + IndexMetadata::for_test("quickwit-demo-index", "ram:///indexes/quickwit-demo-index"); + let index_uid = index_metadata.index_uid.clone(); metastore .expect_index_metadata() - .return_once(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "quickwit-demo-index", - "ram:///indexes/quickwit-demo-index", - )) - }); + .return_once(move |_index_id: &str| Ok(index_metadata)); metastore .expect_list_splits() - .return_once(|list_split_query: ListSplitsQuery| { - if list_split_query.index_uid.index_id() == "quickwit-demo-index" + .return_once(move |list_split_query: ListSplitsQuery| { + if list_split_query.index_uids.contains(&index_uid) && list_split_query.split_states.is_empty() && list_split_query.time_range.is_unbounded() && list_split_query.create_timestamp.is_unbounded() @@ -1046,12 +1051,14 @@ mod tests { #[tokio::test] async fn test_get_list_indexes() -> anyhow::Result<()> { let mut metastore = MockMetastore::new(); - metastore.expect_list_indexes_metadatas().return_once(|| { - Ok(vec![IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )]) - }); + metastore.expect_list_indexes_metadatas().return_once( + |_list_indexes_query: ListIndexesQuery| { + Ok(vec![IndexMetadata::for_test( + "test-index", + "ram:///indexes/test-index", + )]) + }, + ); let index_service = IndexService::new(Arc::new(metastore), StorageResolver::unconfigured()); let index_management_handler = super::index_management_handlers( Arc::new(index_service), @@ -1343,7 +1350,10 @@ mod tests { .reply(&index_management_handler) .await; assert_eq!(resp.status(), 200); - let indexes = metastore.list_indexes_metadatas().await.unwrap(); + let indexes = metastore + .list_indexes_metadatas(ListIndexesQuery::All) + .await + .unwrap(); assert!(indexes.is_empty()); } diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index f6bad75c365..1de8b2f5a67 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -75,8 +75,8 @@ use quickwit_ingest::{ }; use quickwit_janitor::{start_janitor_service, JanitorService}; use quickwit_metastore::{ - Metastore, MetastoreError, MetastoreEvent, MetastoreEventPublisher, MetastoreGrpcClient, - MetastoreResolver, RetryingMetastore, + ListIndexesQuery, Metastore, MetastoreError, MetastoreEvent, MetastoreEventPublisher, + MetastoreGrpcClient, MetastoreResolver, RetryingMetastore, }; use quickwit_opentelemetry::otlp::{OtlpGrpcLogsService, OtlpGrpcTracesService}; use quickwit_proto::control_plane::ControlPlaneServiceClient; @@ -693,7 +693,7 @@ async fn check_cluster_configuration( ); } let file_backed_indexes = metastore - .list_indexes_metadatas() + .list_indexes_metadatas(ListIndexesQuery::All) .await? .into_iter() .filter(|index_metadata| index_metadata.index_uri().protocol().is_file_storage()) @@ -721,7 +721,7 @@ mod tests { use chitchat::transport::ChannelTransport; use quickwit_cluster::{create_cluster_for_test, ClusterNode}; use quickwit_common::uri::Uri; - use quickwit_metastore::{metastore_for_test, IndexMetadata, MockMetastore}; + use quickwit_metastore::{metastore_for_test, IndexMetadata, ListIndexesQuery, MockMetastore}; use quickwit_proto::indexing::IndexingTask; use quickwit_search::Job; use tokio::sync::{mpsc, watch}; @@ -739,12 +739,14 @@ mod tests { .expect_uri() .return_const(Uri::for_test("file:///qwdata/indexes")); - metastore.expect_list_indexes_metadatas().return_once(|| { - Ok(vec![IndexMetadata::for_test( - "test-index", - "file:///qwdata/indexes/test-index", - )]) - }); + metastore.expect_list_indexes_metadatas().return_once( + |_list_indexes_query: ListIndexesQuery| { + Ok(vec![IndexMetadata::for_test( + "test-index", + "file:///qwdata/indexes/test-index", + )]) + }, + ); check_cluster_configuration(&services, &peer_seeds, Arc::new(metastore)) .await diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs index f0e75e11144..1c995d873c7 100644 --- a/quickwit/quickwit-serve/src/rest.rs +++ b/quickwit/quickwit-serve/src/rest.rs @@ -207,6 +207,12 @@ fn get_status_with_error(rejection: Rejection) -> ApiError { service_code: ServiceErrorCode::BadRequest, message: error.0.to_string(), } + } else if let Some(error) = rejection.find::() { + // Happens when the url path or request body contains invalid argument(s). + ApiError { + service_code: ServiceErrorCode::BadRequest, + message: error.0.to_string(), + } } else if let Some(error) = rejection.find::() { // Happens when the request body could not be deserialized correctly. ApiError { diff --git a/quickwit/quickwit-serve/src/search_api/mod.rs b/quickwit/quickwit-serve/src/search_api/mod.rs index 9eaa8fbc77a..9e9e323adb4 100644 --- a/quickwit/quickwit-serve/src/search_api/mod.rs +++ b/quickwit/quickwit-serve/src/search_api/mod.rs @@ -21,6 +21,7 @@ mod grpc_adapter; mod rest_handler; pub use self::grpc_adapter::GrpcSearchAdapter; +pub(crate) use self::rest_handler::extract_index_id_patterns; pub use self::rest_handler::{ search_get_handler, search_post_handler, search_stream_handler, SearchApi, SearchRequestQueryString, SortBy, @@ -32,11 +33,11 @@ mod tests { use std::sync::Arc; use futures::TryStreamExt; - use quickwit_indexing::mock_split; + use quickwit_indexing::MockSplitBuilder; use quickwit_metastore::{IndexMetadata, MockMetastore}; use quickwit_proto::search_service_server::SearchServiceServer; use quickwit_proto::{tonic, OutputFormat}; - use quickwit_query::query_ast::qast_helper; + use quickwit_query::query_ast::qast_string_helper; use quickwit_search::{ create_search_client_from_grpc_addr, root_search_stream, ClusterClient, MockSearchService, SearchError, SearchJobPlacer, SearchService, SearcherPool, @@ -66,7 +67,7 @@ mod tests { // This test aims at checking the client gRPC implementation. let request = quickwit_proto::SearchStreamRequest { index_id: "test-index".to_string(), - query_ast: qast_helper("test", &["body"]), + query_ast: qast_string_helper("test", &["body"]), snippet_fields: Vec::new(), start_timestamp: None, end_timestamp: None, @@ -75,17 +76,21 @@ mod tests { partition_by_field: None, }; let mut metastore = MockMetastore::new(); + let index_metadata = IndexMetadata::for_test("test-index", "ram:///indexes/test-index"); + let index_uid = index_metadata.index_uid.clone(); metastore .expect_index_metadata() - .returning(|_index_id: &str| { - Ok(IndexMetadata::for_test( - "test-index", - "ram:///indexes/test-index", - )) - }); - metastore - .expect_list_splits() - .returning(|_filter| Ok(vec![mock_split("split_1"), mock_split("split_2")])); + .returning(move |_index_id: &str| Ok(index_metadata.clone())); + metastore.expect_list_splits().returning(move |_filter| { + Ok(vec![ + MockSplitBuilder::new("split_1") + .with_index_uid(&index_uid) + .build(), + MockSplitBuilder::new("split_2") + .with_index_uid(&index_uid) + .build(), + ]) + }); let mut mock_search_service = MockSearchService::new(); let (result_sender, result_receiver) = tokio::sync::mpsc::unbounded_channel(); result_sender.send(Ok(quickwit_proto::LeafSearchStreamResponse { diff --git a/quickwit/quickwit-serve/src/search_api/rest_handler.rs b/quickwit/quickwit-serve/src/search_api/rest_handler.rs index 3abb2bd74a6..5fd8351e49a 100644 --- a/quickwit/quickwit-serve/src/search_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/search_api/rest_handler.rs @@ -23,6 +23,7 @@ use std::sync::Arc; use futures::stream::StreamExt; use hyper::header::HeaderValue; use hyper::HeaderMap; +use quickwit_config::validate_index_id_pattern; use quickwit_proto::{OutputFormat, ServiceError, SortField, SortOrder}; use quickwit_query::query_ast::query_ast_from_user_text; use quickwit_search::{SearchError, SearchResponseRest, SearchService}; @@ -200,7 +201,7 @@ pub struct SearchRequestQueryString { } async fn search_endpoint( - index_id: String, + index_id_patterns: Vec, search_request: SearchRequestQueryString, search_service: &dyn SearchService, ) -> Result { @@ -210,7 +211,7 @@ async fn search_endpoint( let query_ast = query_ast_from_user_text(&search_request.query, search_request.search_fields); let query_ast_json = serde_json::to_string(&query_ast)?; let search_request = quickwit_proto::SearchRequest { - index_id, + index_id_patterns, query_ast: query_ast_json, snippet_fields: search_request.snippet_fields.unwrap_or_default(), start_timestamp: search_request.start_timestamp, @@ -229,31 +230,51 @@ async fn search_endpoint( } fn search_get_filter( -) -> impl Filter + Clone { +) -> impl Filter, SearchRequestQueryString), Error = Rejection> + Clone { warp::path!(String / "search") + .and_then(extract_index_id_patterns) .and(warp::get()) .and(serde_qs::warp::query(serde_qs::Config::default())) } fn search_post_filter( -) -> impl Filter + Clone { +) -> impl Filter, SearchRequestQueryString), Error = Rejection> + Clone { warp::path!(String / "search") + .and_then(extract_index_id_patterns) .and(warp::post()) .and(warp::body::content_length_limit(1024 * 1024)) .and(warp::body::json()) } async fn search( - index_id: String, + index_id_patterns: Vec, search_request: SearchRequestQueryString, search_service: Arc, ) -> impl warp::Reply { - info!(index_id = %index_id, request =? search_request, "search"); + info!(request =? search_request, "search"); let body_format = search_request.format; - let result = search_endpoint(index_id, search_request, &*search_service).await; + let result = search_endpoint(index_id_patterns, search_request, &*search_service).await; make_json_api_response(result, body_format) } +pub(crate) async fn extract_index_id_patterns( + comma_separated_index_patterns: String, +) -> Result, Rejection> { + let mut index_ids_patterns = Vec::new(); + for index_id_pattern in comma_separated_index_patterns.split(',') { + validate_index_id_pattern(index_id_pattern).map_err(|error| { + warp::reject::custom(crate::rest::InvalidArgument(error.to_string())) + })?; + index_ids_patterns.push(index_id_pattern.to_string()); + } + if index_ids_patterns.is_empty() { + return Err(warp::reject::custom(crate::rest::InvalidArgument( + "Missing index ID.".to_string(), + ))); + } + Ok(index_ids_patterns) +} + #[utoipa::path( get, tag = "Search", @@ -492,7 +513,7 @@ mod tests { #[tokio::test] async fn test_rest_search_api_route_post() { let rest_search_api_filter = search_post_filter(); - let (index, req) = warp::test::request() + let (indexes, req) = warp::test::request() .method("POST") .path("/quickwit-demo-index/search?query=*&max_hits=10") .json(&true) @@ -500,7 +521,44 @@ mod tests { .filter(&rest_search_api_filter) .await .unwrap(); - assert_eq!(&index, "quickwit-demo-index"); + assert_eq!(indexes, vec!["quickwit-demo-index".to_string()]); + assert_eq!( + &req, + &super::SearchRequestQueryString { + query: "*".to_string(), + search_fields: None, + start_timestamp: None, + max_hits: 10, + format: BodyFormat::default(), + sort_by: SortBy::default(), + aggs: Some(json!({"range":[]})), + ..Default::default() + } + ); + } + + #[tokio::test] + async fn test_rest_search_api_route_post_multi_indexes() { + let rest_search_api_filter = search_post_filter(); + let (indexes, req) = warp::test::request() + .method("POST") + .path( + "/quickwit-demo-index,quickwit-demo,quickwit-demo-index-*/search?query=*&\ + max_hits=10", + ) + .json(&true) + .body(r#"{"query": "*", "max_hits":10, "aggs": {"range":[]} }"#) + .filter(&rest_search_api_filter) + .await + .unwrap(); + assert_eq!( + indexes, + vec![ + "quickwit-demo-index".to_string(), + "quickwit-demo".to_string(), + "quickwit-demo-index-*".to_string() + ] + ); assert_eq!( &req, &super::SearchRequestQueryString { @@ -516,10 +574,30 @@ mod tests { ); } + #[tokio::test] + async fn test_rest_search_api_route_post_multi_indexes_bad_pattern() { + let rest_search_api_filter = search_post_filter(); + let bad_pattern_rejection = warp::test::request() + .method("POST") + .path("/quickwit-demo-index**/search?query=*&max_hits=10") + .json(&true) + .body(r#"{"query": "*", "max_hits":10, "aggs": {"range":[]} }"#) + .filter(&rest_search_api_filter) + .await + .unwrap_err(); + let rejection = bad_pattern_rejection + .find::() + .unwrap(); + assert_eq!( + rejection.0, + "Index ID pattern `quickwit-demo-index**` is invalid. Patterns must not contain `**`." + ); + } + #[tokio::test] async fn test_rest_search_api_route_simple() { let rest_search_api_filter = search_get_filter(); - let (index, req) = warp::test::request() + let (indexes, req) = warp::test::request() .path( "/quickwit-demo-index/search?query=*&end_timestamp=1450720000&max_hits=10&\ start_offset=22", @@ -527,7 +605,7 @@ mod tests { .filter(&rest_search_api_filter) .await .unwrap(); - assert_eq!(&index, "quickwit-demo-index"); + assert_eq!(indexes, vec!["quickwit-demo-index".to_string()]); assert_eq!( &req, &super::SearchRequestQueryString { @@ -547,7 +625,7 @@ mod tests { #[tokio::test] async fn test_rest_search_api_route_simple_default_num_hits_default_offset() { let rest_search_api_filter = search_get_filter(); - let (index, req) = warp::test::request() + let (indexes, req) = warp::test::request() .path( "/quickwit-demo-index/search?query=*&end_timestamp=1450720000&search_field=title,\ body", @@ -555,7 +633,7 @@ mod tests { .filter(&rest_search_api_filter) .await .unwrap(); - assert_eq!(&index, "quickwit-demo-index"); + assert_eq!(indexes, vec!["quickwit-demo-index".to_string()]); assert_eq!( &req, &super::SearchRequestQueryString { @@ -575,12 +653,12 @@ mod tests { #[tokio::test] async fn test_rest_search_api_route_simple_format() { let rest_search_api_filter = search_get_filter(); - let (index, req) = warp::test::request() + let (indexes, req) = warp::test::request() .path("/quickwit-demo-index/search?query=*&format=json") .filter(&rest_search_api_filter) .await .unwrap(); - assert_eq!(&index, "quickwit-demo-index"); + assert_eq!(indexes, vec!["quickwit-demo-index".to_string()]); assert_eq!( &req, &super::SearchRequestQueryString { @@ -796,8 +874,8 @@ mod tests { async fn test_rest_search_api_with_index_does_not_exist() -> anyhow::Result<()> { let mut mock_search_service = MockSearchService::new(); mock_search_service.expect_root_search().returning(|_| { - Err(SearchError::IndexDoesNotExist { - index_id: "not-found-index".to_string(), + Err(SearchError::IndexesDoNotExist { + index_id_patterns: vec!["not-found-index".to_string()], }) }); let rest_search_api_handler = search_handler(mock_search_service); @@ -991,4 +1069,57 @@ mod tests { assert_json_eq!(resp_json, expected_response_json); Ok(()) } + + #[tokio::test] + async fn test_rest_search_api_multi_indexes() { + { + let mut mock_search_service = MockSearchService::new(); + mock_search_service + .expect_root_search() + .with(predicate::function( + |search_request: &quickwit_proto::SearchRequest| { + search_request.index_id_patterns + == vec!["quickwit-demo-*".to_string(), "quickwit-demo2".to_string()] + }, + )) + .returning(|_| Ok(Default::default())); + let rest_search_api_handler = search_handler(mock_search_service); + assert_eq!( + warp::test::request() + .path("/quickwit-demo-*,quickwit-demo2/search?query=*") + .reply(&rest_search_api_handler) + .await + .status(), + 200 + ); + } + { + let mut mock_search_service = MockSearchService::new(); + mock_search_service + .expect_root_search() + .returning(|_| Ok(Default::default())); + let rest_search_api_handler = search_handler(mock_search_service); + assert_eq!( + warp::test::request() + .path("/*/search?query=*") + .reply(&rest_search_api_handler) + .await + .status(), + 200 + ); + let response = warp::test::request() + .path("/abc!/search?query=*") + .reply(&rest_search_api_handler) + .await; + println!("{:?}", response.body()); + assert_eq!( + warp::test::request() + .path("/abc!/search?query=*") + .reply(&rest_search_api_handler) + .await + .status(), + 400 + ); + } + } } diff --git a/quickwit/rest-api-tests/README.md b/quickwit/rest-api-tests/README.md index 0311ea07a93..a0742438e4b 100644 --- a/quickwit/rest-api-tests/README.md +++ b/quickwit/rest-api-tests/README.md @@ -1,8 +1,8 @@ # Rest API tests This directory is meant to test quickwit at the Rest API level. -It was originally meant to iterate over the elastic search compatibility API, -but can also be used as a convenient way to create integration tests. +It was initially meant to iterate over the elastic search compatibility API, +but it can also be used as a convenient way to create integration tests. # Setting up the Python environment @@ -25,19 +25,21 @@ pipenv install The test script is meant to target `elasticsearch` and `quickwit`. -When targetting quickwit, the script expects a fresh quickwit instance -to be running on `http://localhost:7280`. The data involved is small and +When targeting quickwit, the script expects a fresh quickwit instance +running on `http://localhost:7280`. The data involved is small, and running in DEBUG mode is fine. -```./rest_api_test.py --engine quickwit``` +```bash +./run_tests.py --engine quickwit +``` -When targetting elasticsearch, the script expects elastic to be running on +When targeting elasticsearch, the script expects elastic to be running on `http://localhost:9200`. -In both case, the test will take care of setting up, ingesting and tearing down the +In both cases, the test will take care of setting up, ingesting and tearing down the indexes involved. -```./rest_api_test.py --engine elasticsearch``` +```./run_tests.py --engine elasticsearch``` # Writing a new test suite diff --git a/quickwit/rest-api-tests/run_tests.py b/quickwit/rest-api-tests/run_tests.py index 2b08f5616ee..d2ac0f31ab9 100755 --- a/quickwit/rest-api-tests/run_tests.py +++ b/quickwit/rest-api-tests/run_tests.py @@ -83,8 +83,6 @@ def resolve_previous_result(c, previous_result): ] return c -print(resolve_previous_result({"hello": {"$previous": "val[\"scroll_id\"]"}}, {"scroll_id": "123"})) - def run_request_step(method, step, previous_result): assert method in {"GET", "POST", "PUT", "DELETE"} if "headers" not in step: @@ -104,7 +102,8 @@ def run_request_step(method, step, previous_result): kvargs = resolve_previous_result(kvargs, previous_result) ndjson = step.get("ndjson", None) if ndjson is not None: - kvargs["data"] = "\n".join([json.dumps(doc) for doc in ndjson]) + # Add a newline at the end to please elasticsearch -> "The bulk request must be terminated by a newline [\\n]". + kvargs["data"] = "\n".join([json.dumps(doc) for doc in ndjson]) + "\n" kvargs.setdefault("headers")["Content-Type"] = "application/json" expected_status_code = step.get("status_code", 200) num_retries = step.get("num_retries", 0) diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/0001-muti_indices_query.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/0001-muti_indices_query.yaml new file mode 100644 index 00000000000..d5ee8dcc873 --- /dev/null +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/0001-muti_indices_query.yaml @@ -0,0 +1,19 @@ +params: + q: "*" +expected: + hits: + total: + value: 4 + relation: "eq" + hits: + $expect: "len(val) == 4" +--- +params: + q: "actor.login:fmassot OR actor.login:guilload" +expected: + hits: + total: + value: 2 + relation: "eq" + hits: + $expect: "len(val) == 2" diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_ctx.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_ctx.yaml new file mode 100644 index 00000000000..966d8c3c9bf --- /dev/null +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_ctx.yaml @@ -0,0 +1,2 @@ +method: [GET, POST] +endpoint: "gharchive-*/_search" diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_setup.elasticsearch.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_setup.elasticsearch.yaml new file mode 100644 index 00000000000..3ec78abd47a --- /dev/null +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_setup.elasticsearch.yaml @@ -0,0 +1,107 @@ +# Delete possibly remaining index +method: DELETE +endpoint: gharchive-1 +status_code: null +--- +# Delete possibly remaining index +method: DELETE +endpoint: gharchive-2 +status_code: null +--- +# Create index 1 +method: PUT +endpoint: gharchive-1 +json: { + "mappings": { + "properties": { + "id": { + "type": "text", + "store": true, + "norms": false, + "index_options": "docs" + }, + "type": { + "type": "text", + "store": true, + "norms": false, + "index_options": "docs", + "fielddata": true + }, + "actor": { + "properties": { + "id": { + "type": "long", + "store": true + }, + "login": { + "type": "text", + "store": true, + "norms": false, + "index_options": "docs" + } + } + }, + "created_at": { + "type": "date", + "store": true + } + } + } +} +--- +# Create index 2 +method: PUT +endpoint: gharchive-2 +json: { + "mappings": { + "properties": { + "id": { + "type": "text", + "store": true, + "norms": false, + "index_options": "docs" + }, + "type": { + "type": "text", + "store": true, + "norms": false, + "index_options": "docs", + "fielddata": true + }, + "actor": { + "properties": { + "id": { + "type": "long", + "store": true + }, + "login": { + "type": "text", + "store": true, + "norms": false, + "index_options": "docs" + } + } + }, + "created_at": { + "type": "date", + "store": true + } + } + } +} +--- +# Ingest documents in index 1 and 2 +method: POST +endpoint: _bulk +params: + refresh: "true" +headers: {"Content-Type": "application/json"} +ndjson: + - "index": { "_index": "gharchive-1" } + - {"id": 1, "created_at":"2015-02-01T00:00:14Z", "type": "CreateEvent", "actor": { "id": 1, "login": "fmassot" } } + - "index": { "_index": "gharchive-1" } + - {"id": 2, "created_at":"2015-02-01T00:00:14Z", "type": "CreateEvent", "actor": { "id": 2, "login": "fulmicoton" } } + - "index": { "_index": "gharchive-2" } + - {"id": 3, "created_at":"2015-02-01T00:00:14Z", "type": "CreateEvent", "actor": { "id": 3, "login": "guilload" } } + - "index": { "_index": "gharchive-2" } + - {"id": 4, "created_at":"2015-02-01T00:00:14Z", "type": "CreateEvent", "actor": { "id": 4, "login": "trinity" } } diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_setup.quickwit.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_setup.quickwit.yaml new file mode 100644 index 00000000000..312d01e6899 --- /dev/null +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_setup.quickwit.yaml @@ -0,0 +1,80 @@ +# Delete possibly remaining index +method: DELETE +api_root: http://localhost:7280/api/v1/ +endpoint: indexes/gharchive-1 +status_code: null +--- +method: DELETE +api_root: http://localhost:7280/api/v1/ +endpoint: indexes/gharchive-2 +status_code: null +--- +# Create index 1 +method: POST +api_root: http://localhost:7280/api/v1/ +endpoint: indexes/ +json: + version: "0.6" + index_id: gharchive-1 + doc_mapping: + index_field_presence: true + timestamp_field: created_at + mode: dynamic + field_mappings: + - name: created_at + type: datetime + fast: true + dynamic_mapping: + expand_dots: true + tokenizer: default + fast: + normalizer: lowercase + record: position +--- +# Create index 2 +method: POST +api_root: http://localhost:7280/api/v1/ +endpoint: indexes/ +json: + version: "0.6" + index_id: gharchive-2 + doc_mapping: + index_field_presence: true + timestamp_field: created_at + mode: strict + field_mappings: + - name: created_at + type: datetime + fast: true + - name: id + type: u64 + fast: true + - name: type + type: text + fast: true + - name: actor + type: object + fast: true + field_mappings: + - name: id + type: u64 + fast: true + - name: login + type: text + fast: true +--- +# Ingest documents in index 1 and 2 +method: POST +endpoint: _bulk +params: + refresh: "true" +headers: {"Content-Type": "application/json"} +ndjson: + - "index": { "_index": "gharchive-1" } + - {"id": 1, "created_at":"2015-02-01T00:00:14Z", "type": "CreateEvent", "actor": { "id": 1, "login": "fmassot" } } + - "index": { "_index": "gharchive-1" } + - {"id": 2, "created_at":"2015-02-01T00:00:14Z", "type": "CreateEvent", "actor": { "id": 2, "login": "fulmicoton" } } + - "index": { "_index": "gharchive-2" } + - {"id": 3, "created_at":"2015-02-01T00:00:14Z", "type": "CreateEvent", "actor": { "id": 3, "login": "guilload" } } + - "index": { "_index": "gharchive-2" } + - {"id": 4, "created_at":"2015-02-01T00:00:14Z", "type": "CreateEvent", "actor": { "id": 4, "login": "trinity" } } diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_teardown.elasticsearch.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_teardown.elasticsearch.yaml new file mode 100644 index 00000000000..56a84a5d8dd --- /dev/null +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_teardown.elasticsearch.yaml @@ -0,0 +1,5 @@ +method: DELETE +endpoint: gharchive-1 +--- +method: DELETE +endpoint: gharchive-2 diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_teardown.quickwit.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_teardown.quickwit.yaml new file mode 100644 index 00000000000..0c8d478188a --- /dev/null +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/multi-indices/_teardown.quickwit.yaml @@ -0,0 +1,7 @@ +method: DELETE +api_root: http://localhost:7280/api/v1/ +endpoint: indexes/gharchive-1 +--- +method: DELETE +api_root: http://localhost:7280/api/v1/ +endpoint: indexes/gharchive-2