Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parameter to limit the size of a query string #4320

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@
"fast_field_cache_capacity": "10G",
"split_footer_cache_capacity": "1G",
"max_num_concurrent_split_streams": 120,
"max_num_concurrent_split_searches": 150
"max_num_concurrent_split_searches": 150,
"query_string_size_limit": "256B"
},
"jaeger": {
"enable_endpoint": true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ fast_field_cache_capacity = "10G"
split_footer_cache_capacity = "1G"
max_num_concurrent_split_streams = 120
max_num_concurrent_split_searches = 150
query_string_size_limit = "256B"

[jaeger]
enable_endpoint = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ searcher:
split_footer_cache_capacity: 1G
max_num_concurrent_split_streams: 120
max_num_concurrent_split_searches: 150
query_string_size_limit: 256B

jaeger:
enable_endpoint: true
Expand Down
2 changes: 2 additions & 0 deletions quickwit/quickwit-config/src/node_config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ pub struct SearcherConfig {
pub partial_request_cache_capacity: ByteSize,
pub max_num_concurrent_split_searches: usize,
pub max_num_concurrent_split_streams: usize,
pub query_string_size_limit: ByteSize,
// Strangely, if None, this will also have the effect of not forwarding
// to searcher.
// TODO document and fix if necessary.
Expand All @@ -187,6 +188,7 @@ impl Default for SearcherConfig {
max_num_concurrent_split_searches: 100,
aggregation_memory_limit: ByteSize::mb(500),
aggregation_bucket_limit: 65000,
query_string_size_limit: ByteSize::b(1024),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This limit is too low. For Jaeger, we need to make queries with at least 100 trace ID terms or even bigger ones. A trace ID is 128 bits, we represent it in hex string, so 2 characters for each byte, so we need 32 chars, and I'm not counting the field name size, which has 8 characters.

I feel like we should either put a limit on the number of terms, like 1K terms, or set a 100kb in terms of size.

@fulmicoton @guilload any thoughts?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can I go ahead with increasing the default size of the terms to 100kb @fulmicoton @guilload ?

split_cache: None,
}
}
Expand Down
1 change: 1 addition & 0 deletions quickwit/quickwit-config/src/node_config/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ mod tests {
partial_request_cache_capacity: ByteSize::mb(64),
max_num_concurrent_split_searches: 150,
max_num_concurrent_split_streams: 120,
query_string_size_limit: ByteSize::b(256),
split_cache: None,
}
);
Expand Down
84 changes: 64 additions & 20 deletions quickwit/quickwit-search/src/root.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ use itertools::Itertools;
use quickwit_common::shared_consts::{DELETION_GRACE_PERIOD, SCROLL_BATCH_LEN};
use quickwit_common::uri::Uri;
use quickwit_common::PrettySample;
use quickwit_config::{build_doc_mapper, IndexConfig};
use quickwit_config::{build_doc_mapper, IndexConfig, SearcherConfig};
use quickwit_doc_mapper::tag_pruning::extract_tags_from_query;
use quickwit_doc_mapper::{DocMapper, DYNAMIC_FIELD_NAME};
use quickwit_doc_mapper::DYNAMIC_FIELD_NAME;
use quickwit_metastore::{
IndexMetadata, IndexMetadataResponseExt, ListIndexesMetadataResponseExt, ListSplitsRequestExt,
MetastoreServiceStreamSplitsExt, SplitMetadata,
Expand Down Expand Up @@ -164,13 +164,16 @@ type TimestampFieldOpt = Option<String>;
fn validate_request_and_build_metadatas(
indexes_metadata: &[IndexMetadata],
search_request: &SearchRequest,
searcher_config: &SearcherConfig,
) -> crate::Result<(TimestampFieldOpt, QueryAst, IndexesMetasForLeafSearch)> {
let mut metadatas_for_leaf: HashMap<IndexUid, IndexMetasForLeafSearch> = HashMap::new();
let query_ast: QueryAst = serde_json::from_str(&search_request.query_ast)
.map_err(|err| SearchError::InvalidQuery(err.to_string()))?;
let mut query_ast_resolved_opt: Option<QueryAst> = None;
let mut timestamp_field_opt: Option<String> = None;

validate_request(searcher_config, search_request)?;

for index_metadata in indexes_metadata {
let doc_mapper = build_doc_mapper(
&index_metadata.index_config.doc_mapping,
Expand Down Expand Up @@ -214,7 +217,24 @@ fn validate_request_and_build_metadatas(
}
}

validate_request(&*doc_mapper, search_request)?;
let schema = doc_mapper.schema();
if doc_mapper.timestamp_field_name().is_none()
&& (search_request.start_timestamp.is_some() || search_request.end_timestamp.is_some())
{
return Err(SearchError::InvalidQuery(format!(
"the timestamp field is not set in index: {:?} definition but start-timestamp or \
end-timestamp are set in the query",
search_request.index_id_patterns
)));
}

validate_requested_snippet_fields(&schema, &search_request.snippet_fields)?;

validate_sort_by_fields_and_search_after(
&search_request.sort_fields,
&search_request.search_after,
&schema,
)?;

// Validates the query by effectively building it against the current schema.
doc_mapper.query(doc_mapper.schema(), &query_ast_resolved_for_index, true)?;
Expand Down Expand Up @@ -386,28 +406,17 @@ fn validate_sort_by_field(
}

fn validate_request(
doc_mapper: &dyn DocMapper,
searcher_config: &SearcherConfig,
search_request: &SearchRequest,
) -> crate::Result<()> {
let schema = doc_mapper.schema();
if doc_mapper.timestamp_field_name().is_none()
&& (search_request.start_timestamp.is_some() || search_request.end_timestamp.is_some())
{
if search_request.query_ast.len() > searcher_config.query_string_size_limit.as_u64() as usize {
return Err(SearchError::InvalidQuery(format!(
"the timestamp field is not set in index: {:?} definition but start-timestamp or \
end-timestamp are set in the query",
search_request.index_id_patterns
"max size for query string is {} bytes, but got a query of size {}",
searcher_config.query_string_size_limit.as_u64(),
search_request.query_ast.len()
)));
}

validate_requested_snippet_fields(&schema, &search_request.snippet_fields)?;

validate_sort_by_fields_and_search_after(
&search_request.sort_fields,
&search_request.search_after,
&schema,
)?;

if let Some(agg) = search_request.aggregation_request.as_ref() {
let _aggs: QuickwitAggregations = serde_json::from_str(agg).map_err(|_err| {
let err = serde_json::from_str::<tantivy::aggregation::agg_req::Aggregations>(agg)
Expand Down Expand Up @@ -907,7 +916,11 @@ pub async fn root_search(
.map(|index_metadata| index_metadata.index_uid.clone())
.collect_vec();
let (timestamp_field_opt, query_ast_resolved, indexes_metas_for_leaf_search) =
validate_request_and_build_metadatas(&indexes_metadata, &search_request)?;
validate_request_and_build_metadatas(
&indexes_metadata,
&search_request,
&searcher_context.searcher_config,
)?;
search_request.query_ast = serde_json::to_string(&query_ast_resolved)?;

// convert search_after datetime values from input datetime format to nanos.
Expand Down Expand Up @@ -1514,6 +1527,7 @@ mod tests {
start_offset: 10,
..Default::default()
};
let searcher_config = SearcherConfig::default();
let index_metadata = IndexMetadata::for_test("test-index-1", "ram:///test-index-1");
let index_metadata_with_other_config =
index_metadata_for_multi_indexes_test("test-index-2", "ram:///test-index-2");
Expand All @@ -1531,6 +1545,7 @@ mod tests {
index_metadata_no_timestamp,
],
&search_request,
&searcher_config,
)
.unwrap();
assert_eq!(timestamp_field, Some("timestamp".to_string()));
Expand All @@ -1547,6 +1562,7 @@ mod tests {
start_offset: 10,
..Default::default()
};
let searcher_config = SearcherConfig::default();
let index_metadata_1 = IndexMetadata::for_test("test-index-1", "ram:///test-index-1");
let mut index_metadata_2 = IndexMetadata::for_test("test-index-2", "ram:///test-index-2");
let doc_mapping_json_2 = r#"{
Expand Down Expand Up @@ -1574,6 +1590,7 @@ mod tests {
let timestamp_field_different = validate_request_and_build_metadatas(
&[index_metadata_1, index_metadata_2],
&search_request,
&searcher_config,
)
.unwrap_err();
assert_eq!(
Expand All @@ -1592,6 +1609,7 @@ mod tests {
start_offset: 10,
..Default::default()
};
let searcher_config = SearcherConfig::default();
let index_metadata_1 = IndexMetadata::for_test("test-index-1", "ram:///test-index-1");
let mut index_metadata_2 = IndexMetadata::for_test("test-index-2", "ram:///test-index-2");
index_metadata_2
Expand All @@ -1601,6 +1619,7 @@ mod tests {
let timestamp_field_different = validate_request_and_build_metadatas(
&[index_metadata_1, index_metadata_2],
&search_request,
&searcher_config,
)
.unwrap_err();
assert_eq!(
Expand All @@ -1610,6 +1629,31 @@ mod tests {
);
}

#[test]
fn test_validate_request_and_build_metadatas_fail_with_large_query_string() {
let request_query_ast = qast_helper("body:xdvncprunkplzditmkiwlzlgvflgxtoomfwhoulbnttelvbolsyhjczjgsfxwkinitouwdxrhktbowuttacewdrdqxolaagnvllybdbfgfgutlkrpdxguhphudruszcopsflrtulyajbeknzvxlpybjchchbozkbrdnlumrobfzgiblesagdaxdvtxikvufhbnzhifhsaxujopxhylhwidliopghahihfwroidayzqqlexwtentdbeddxphwsnxuxnbmnqpcnbpgsjxmuixoidqkyysoyiieytlnnfeaknvmnvavvecgccwjnocudznpzznzietzxxeudrcachcygjxpretuslwnbbdxzxdknvfudhwajatoqjylnvzrwaazuqtezwvkgnrxchacxljepfggodniexrflvfpenqfbkonzvcpgwdcljpmwejhmtijodoeheeijpdxfunkcvtesuyxiejvzzzqrqtirszilmxlvbwolvcguylebilcdhuvhgwpccgoihglgnclsigoewenbejyqojmbkdodbofxogdfrmvgcdtizkfypdnqntyhtsbymiwwmkjovlgcukolyizfvpipxycqoxzcsqstjygqxiwzzwbbibqcgudypxaywawdghzhjbunpynoumybslzikkqsnayoerdrobhupuhbetmwxlngbwliycpvobaxcistnkgbiwquyuudqontzoqufqvrydrowtequlwwxihyjechkhpspzswxssuikonwjdvvnfyxrtqehostebdktuakiyhilmwobkqkjifzzozhpgvpaygvncsvkhdtwgoreezfkxcebcckiuckzbvsokwvqrrprtdzuzvoaiyzhwihsdfuormefriswonrpkofxmxerngutjqnxuialtpkpivrwwomnhnvgfscinoezhpaibvwlyjeutjkrhgwrfmfvcwpraoqvndicyswucgffnneuwglufushjaibpxwgvwjgmqvlucqrnlujelltd", &[]);
let search_request = quickwit_proto::search::SearchRequest {
index_id_patterns: vec!["test-index".to_string()],
query_ast: serde_json::to_string(&request_query_ast).unwrap(),
max_hits: 10,
start_offset: 10,
..Default::default()
};
let searcher_config = SearcherConfig::default();
let index_metadata = IndexMetadata::for_test("test-index-1", "ram:///test-index-1");
let query_over_size_limit = validate_request_and_build_metadatas(
&[index_metadata],
&search_request,
&searcher_config,
)
.unwrap_err();

assert_eq!(
query_over_size_limit.to_string(),
"max size for query string is 1024 bytes, but got a query of size 1130"
);
}

#[test]
fn test_convert_sort_datetime_value() {
let mut sort_value = SortValue::U64(1617000000000000000);
Expand Down