Skip to content

Commit

Permalink
add support for wildcard exclusion in index pattern
Browse files Browse the repository at this point in the history
  • Loading branch information
trinity-1686a committed Jan 25, 2024
1 parent 9eef3bb commit 5ff7387
Show file tree
Hide file tree
Showing 9 changed files with 122 additions and 29 deletions.
29 changes: 21 additions & 8 deletions quickwit/quickwit-config/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,23 @@ pub fn validate_identifier(label: &str, value: &str) -> anyhow::Result<()> {
/// Checks whether an index ID pattern conforms to Quickwit conventions.
/// Index ID patterns accept the same characters as identifiers AND accept `*`
/// chars to allow for glob-like patterns.
pub fn validate_index_id_pattern(pattern: &str) -> anyhow::Result<()> {
pub fn validate_index_id_pattern(pattern: &str, allow_negative: bool) -> anyhow::Result<()> {
static IDENTIFIER_REGEX_WITH_GLOB_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^[a-zA-Z\*][a-zA-Z0-9-_\.\*]{0,254}$")
.expect("regular expression should compile")
});
if !IDENTIFIER_REGEX_WITH_GLOB_PATTERN.is_match(pattern) {
static IDENTIFIER_REGEX_WITH_GLOB_PATTERN_NEGATIVE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^-?[a-zA-Z\*][a-zA-Z0-9-_\.\*]{0,254}$")
.expect("regular expression should compile")
});

let regex = if allow_negative {
&IDENTIFIER_REGEX_WITH_GLOB_PATTERN_NEGATIVE
} else {
&IDENTIFIER_REGEX_WITH_GLOB_PATTERN
};

if !regex.is_match(pattern) {
bail!(
"index ID pattern `{pattern}` is invalid: patterns must match the following regular \
expression: `^[a-zA-Z\\*][a-zA-Z0-9-_\\.\\*]{{0,254}}$`"
Expand Down Expand Up @@ -269,14 +280,16 @@ mod tests {

#[test]
fn test_validate_index_id_pattern() {
validate_index_id_pattern("*").unwrap();
validate_index_id_pattern("abc.*").unwrap();
validate_index_id_pattern("ab").unwrap_err();
validate_index_id_pattern("").unwrap_err();
validate_index_id_pattern("**").unwrap_err();
assert!(validate_index_id_pattern("foo!")
validate_index_id_pattern("*", false).unwrap();
validate_index_id_pattern("abc.*", false).unwrap();
validate_index_id_pattern("ab", false).unwrap_err();
validate_index_id_pattern("", false).unwrap_err();
validate_index_id_pattern("**", false).unwrap_err();
assert!(validate_index_id_pattern("foo!", false)
.unwrap_err()
.to_string()
.contains("index ID pattern `foo!` is invalid:"));
validate_index_id_pattern("-abc", true).unwrap();
validate_index_id_pattern("-abc", false).unwrap_err();
}
}
28 changes: 24 additions & 4 deletions quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -693,8 +693,25 @@ impl MetastoreService for FileBackedMetastore {
// 2) Get each index metadata. Note that each get will take a read lock on
// `per_index_metastores`. Lock is released in 1) to let a concurrent task/thread to
// take a write lock on `per_index_metastores`.
let index_matcher =
build_regex_set_from_patterns(request.index_id_patterns).map_err(|error| {
let mut positive_patterns = Vec::new();
let mut negative_patterns = Vec::new();
for pattern in request.index_id_patterns {
if let Some(negative_pattern) = pattern.strip_prefix('-') {
negative_patterns.push(negative_pattern.to_string());
} else {
positive_patterns.push(pattern);
}
}
let positive_index_matcher =
build_regex_set_from_patterns(positive_patterns).map_err(|error| {
MetastoreError::Internal {
message: "failed to build RegexSet from index patterns`".to_string(),
cause: error.to_string(),
}
})?;

let negative_index_matcher =
build_regex_set_from_patterns(negative_patterns).map_err(|error| {
MetastoreError::Internal {
message: "failed to build RegexSet from index patterns`".to_string(),
cause: error.to_string(),
Expand All @@ -709,7 +726,10 @@ impl MetastoreService for FileBackedMetastore {
IndexState::Alive(_) => Some(index_id),
_ => None,
})
.filter(|index_id| index_matcher.is_match(index_id))
.filter(|index_id| {
positive_index_matcher.is_match(index_id)
&& !negative_index_matcher.is_match(index_id)
})
.cloned()
.collect()
};
Expand Down Expand Up @@ -951,7 +971,7 @@ fn build_regex_set_from_patterns(patterns: Vec<String>) -> anyhow::Result<RegexS
/// Converts the tokens into a valid regex.
fn build_regex_exprs_from_pattern(index_pattern: &str) -> anyhow::Result<String> {
// Note: consecutive '*' are not allowed in the pattern.
validate_index_id_pattern(index_pattern)?;
validate_index_id_pattern(index_pattern, false)?;
let mut re: String = String::new();
re.push('^');
re.push_str(&index_pattern.split('*').map(regex::escape).join(".*"));
Expand Down
80 changes: 67 additions & 13 deletions quickwit/quickwit-metastore/src/metastore/postgres/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1401,32 +1401,67 @@ fn tags_filter_expression_helper(tags: &TagFilterAst) -> Cond {
}

/// Builds a SQL query that returns indexes which match at least one pattern in
/// `index_id_patterns`. For each pattern, we check if the pattern is valid and replace `*` by `%`
/// `index_id_patterns`, and none of the patterns starting with '-'
///
/// For each pattern, we check if the pattern is valid and replace `*` by `%`
/// to build a SQL `LIKE` query.
fn build_index_id_patterns_sql_query(index_id_patterns: &[String]) -> anyhow::Result<String> {
if index_id_patterns.is_empty() {
let mut positive_patterns = Vec::new();
let mut negative_patterns = Vec::new();
for pattern in index_id_patterns {
if let Some(negative_pattern) = pattern.strip_prefix('-') {
negative_patterns.push(negative_pattern.to_string());
} else {
positive_patterns.push(pattern);
}
}

if positive_patterns.is_empty() {
anyhow::bail!("The list of index id patterns may not be empty.");
}
if index_id_patterns.iter().any(|pattern| pattern == "*") {

if index_id_patterns.iter().any(|pattern| pattern == "*") && negative_patterns.is_empty() {
return Ok("SELECT * FROM indexes".to_string());
}

let mut where_like_query = String::new();
for (index_id_pattern_idx, index_id_pattern) in index_id_patterns.iter().enumerate() {
validate_index_id_pattern(index_id_pattern).map_err(|error| MetastoreError::Internal {
message: "failed to build list indexes query".to_string(),
cause: error.to_string(),
for (index_id_pattern_idx, index_id_pattern) in positive_patterns.iter().enumerate() {
validate_index_id_pattern(index_id_pattern, false).map_err(|error| {
MetastoreError::Internal {
message: "failed to build list indexes query".to_string(),
cause: error.to_string(),
}
})?;
if index_id_pattern_idx != 0 {
where_like_query.push_str(" OR ");
}
if index_id_pattern.contains('*') {
let sql_pattern = index_id_pattern.replace('*', "%");
let _ = write!(where_like_query, "index_id LIKE '{sql_pattern}'");
} else {
let _ = write!(where_like_query, "index_id = '{index_id_pattern}'");
}
if index_id_pattern_idx < index_id_patterns.len() - 1 {
where_like_query.push_str(" OR ");
}
let mut negative_like_query = String::new();
for index_id_pattern in negative_patterns.iter() {
validate_index_id_pattern(index_id_pattern, false).map_err(|error| {
MetastoreError::Internal {
message: "failed to build list indexes query".to_string(),
cause: error.to_string(),
}
})?;
negative_like_query.push_str(" AND ");
if index_id_pattern.contains('*') {
let sql_pattern = index_id_pattern.replace('*', "%");
let _ = write!(negative_like_query, "index_id NOT LIKE '{sql_pattern}'");
} else {
let _ = write!(negative_like_query, "index_id <> '{index_id_pattern}'");
}
}
Ok(format!("SELECT * FROM indexes WHERE {where_like_query}"))

Ok(format!(
"SELECT * FROM indexes WHERE ({where_like_query}){negative_like_query}"
))
}

/// A postgres metastore factory
Expand Down Expand Up @@ -1868,16 +1903,16 @@ mod tests {
fn test_index_id_pattern_like_query() {
assert_eq!(
&build_index_id_patterns_sql_query(&["*-index-*-last*".to_string()]).unwrap(),
"SELECT * FROM indexes WHERE index_id LIKE '%-index-%-last%'"
"SELECT * FROM indexes WHERE (index_id LIKE '%-index-%-last%')"
);
assert_eq!(
&build_index_id_patterns_sql_query(&[
"*-index-*-last*".to_string(),
"another-index".to_string()
])
.unwrap(),
"SELECT * FROM indexes WHERE index_id LIKE '%-index-%-last%' OR index_id = \
'another-index'"
"SELECT * FROM indexes WHERE (index_id LIKE '%-index-%-last%' OR index_id = \
'another-index')"
);
assert_eq!(
&build_index_id_patterns_sql_query(&[
Expand All @@ -1896,5 +1931,24 @@ mod tests {
`*-index-*-&-last**` is invalid: patterns must match the following regular \
expression: `^[a-zA-Z\\*][a-zA-Z0-9-_\\.\\*]{0,254}$``"
);

assert_eq!(
&build_index_id_patterns_sql_query(&["*".to_string(), "-index-name".to_string()])
.unwrap(),
"SELECT * FROM indexes WHERE (index_id LIKE '%') AND index_id <> 'index-name'"
);

assert_eq!(
&build_index_id_patterns_sql_query(&[
"*-index-*-last*".to_string(),
"another-index".to_string(),
"-*-index-1-last*".to_string(),
"-index-2-last".to_string(),
])
.unwrap(),
"SELECT * FROM indexes WHERE (index_id LIKE '%-index-%-last%' OR index_id = \
'another-index') AND index_id NOT LIKE '%-index-1-last%' AND index_id <> \
'index-2-last'"
);
}
}
2 changes: 1 addition & 1 deletion quickwit/quickwit-opentelemetry/src/otlp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ pub fn extract_otel_traces_index_id_patterns_from_metadata(
if index_id_pattern.is_empty() {
continue;
}
validate_index_id_pattern(index_id_pattern).map_err(|error| {
validate_index_id_pattern(index_id_pattern, true).map_err(|error| {
Status::internal(format!(
"invalid index ID pattern in request header: {error}",
))
Expand Down
3 changes: 3 additions & 0 deletions quickwit/quickwit-proto/protos/quickwit/metastore.proto
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,9 @@ message CreateIndexResponse {

message ListIndexesMetadataRequest {
reserved 1;
// List of patterns an index should match or not match to get considered
// An index must match at least one positive pattern (a pattern not starting
// with a '-'), and no negative pattern (a pattern starting with a '-').
repeated string index_id_patterns = 2;
}

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion quickwit/quickwit-search/src/root.rs
Original file line number Diff line number Diff line change
Expand Up @@ -966,7 +966,7 @@ pub fn check_all_index_metadata_found(
let mut index_ids: HashSet<&str> = index_id_patterns
.iter()
.map(|index_ptn| index_ptn.as_str())
.filter(|index_ptn| !index_ptn.contains('*'))
.filter(|index_ptn| !index_ptn.contains('*') && !index_ptn.starts_with('-'))
.collect();

if index_ids.is_empty() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ async fn es_compat_index_multi_search(
)));
}
for index in &request_header.index {
validate_index_id_pattern(index).map_err(|err| {
validate_index_id_pattern(index, true).map_err(|err| {
SearchError::InvalidArgument(format!(
"request header contains an invalid index: {}",
err
Expand Down
2 changes: 1 addition & 1 deletion quickwit/quickwit-serve/src/search_api/rest_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ pub(crate) async fn extract_index_id_patterns(
let mut index_id_patterns = Vec::new();

for index_id_pattern in percent_decoded_comma_separated_index_id_patterns.split(',') {
validate_index_id_pattern(index_id_pattern)
validate_index_id_pattern(index_id_pattern, true)
.map_err(|error| crate::rest::InvalidArgument(error.to_string()))?;
index_id_patterns.push(index_id_pattern.to_string());
}
Expand Down

0 comments on commit 5ff7387

Please sign in to comment.