Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use JSON for concat field #4937

Merged
merged 2 commits into from
Jun 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,6 @@ mod tests {

use super::DefaultDocMapper;
use crate::default_doc_mapper::field_mapping_entry::DEFAULT_TOKENIZER_NAME;
use crate::default_doc_mapper::mapping_tree::value_to_pretokenized;
use crate::{
DefaultDocMapperBuilder, DocMapper, DocParsingError, DOCUMENT_SIZE_FIELD_NAME,
DYNAMIC_FIELD_NAME, FIELD_PRESENCE_FIELD_NAME, SOURCE_FIELD_NAME,
Expand Down Expand Up @@ -1815,7 +1814,7 @@ mod tests {
}"#,
"concat",
r#"{"some_int": 25}"#,
vec![value_to_pretokenized(25).into()],
vec![25_u64.into()],
);
test_doc_from_json_test_aux(
r#"{
Expand All @@ -1830,7 +1829,7 @@ mod tests {
}"#,
"concat",
r#"{"some_int": 25}"#,
vec![value_to_pretokenized(25).into()],
vec![25_u64.into()],
);
}

Expand All @@ -1853,7 +1852,7 @@ mod tests {
}"#,
"concat",
r#"{"some_bool": false}"#,
vec![value_to_pretokenized(false).into()],
vec![false.into()],
);
test_doc_from_json_test_aux(
r#"{
Expand All @@ -1868,7 +1867,7 @@ mod tests {
}"#,
"concat",
r#"{"some_bool": true}"#,
vec![value_to_pretokenized(true).into()],
vec![true.into()],
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -702,9 +702,9 @@ impl Default for QuickwitConcatenateOptions {
}
}

impl From<QuickwitConcatenateOptions> for TextOptions {
impl From<QuickwitConcatenateOptions> for JsonObjectOptions {
fn from(quickwit_text_options: QuickwitConcatenateOptions) -> Self {
let mut text_options = TextOptions::default();
let mut text_options = JsonObjectOptions::default();
let text_field_indexing = TextFieldIndexing::default()
.set_index_option(quickwit_text_options.indexing_options.record)
.set_fieldnorms(quickwit_text_options.indexing_options.fieldnorms)
Expand Down
37 changes: 11 additions & 26 deletions quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ use tantivy::schema::{
BytesOptions, DateOptions, Field, IntoIpv6Addr, IpAddrOptions, JsonObjectOptions,
NumericOptions, OwnedValue as TantivyValue, SchemaBuilder, TextOptions,
};
use tantivy::tokenizer::{PreTokenizedString, Token};
use tantivy::TantivyDocument as Document;

use super::date_time_type::QuickwitDateTimeOptions;
Expand All @@ -54,20 +53,6 @@ pub enum LeafType {
Text(QuickwitTextOptions),
}

pub(crate) fn value_to_pretokenized<T: ToString>(val: T) -> PreTokenizedString {
let text = val.to_string();
PreTokenizedString {
text: text.clone(),
tokens: vec![Token {
offset_from: 0,
offset_to: 1,
position: 0,
text,
position_length: 1,
}],
}
}

enum MapOrArrayIter {
Array(std::vec::IntoIter<JsonValue>),
Map(serde_json::map::IntoIter),
Expand Down Expand Up @@ -161,12 +146,12 @@ pub(crate) fn map_primitive_json_to_tantivy(value: JsonValue) -> Option<TantivyV
match value {
JsonValue::Array(_) | JsonValue::Object(_) | JsonValue::Null => None,
JsonValue::String(text) => Some(TantivyValue::Str(text)),
JsonValue::Bool(val) => Some(value_to_pretokenized(val).into()),
JsonValue::Bool(val) => Some((val).into()),
JsonValue::Number(number) => {
if let Some(val) = u64::from_json_number(&number) {
Some(value_to_pretokenized(val).into())
Some((val).into())
} else {
i64::from_json_number(&number).map(|val| value_to_pretokenized(val).into())
i64::from_json_number(&number).map(|val| (val).into())
}
}
}
Expand Down Expand Up @@ -219,7 +204,7 @@ impl LeafType {
}
}

fn tantivy_string_value_from_json(
fn tantivy_value_from_json(
&self,
json_val: JsonValue,
) -> Result<impl Iterator<Item = TantivyValue>, String> {
Expand All @@ -233,16 +218,16 @@ impl LeafType {
}
LeafType::I64(numeric_options) => {
let val = i64::from_json_to_self(json_val, numeric_options.coerce)?;
Ok(OneOrIter::one(value_to_pretokenized(val).into()))
Ok(OneOrIter::one((val).into()))
}
LeafType::U64(numeric_options) => {
let val = u64::from_json_to_self(json_val, numeric_options.coerce)?;
Ok(OneOrIter::one(value_to_pretokenized(val).into()))
Ok(OneOrIter::one((val).into()))
}
LeafType::F64(_) => Err("unsuported concat type: f64".to_string()),
LeafType::Bool(_) => {
if let JsonValue::Bool(val) = json_val {
Ok(OneOrIter::one(value_to_pretokenized(val).into()))
Ok(OneOrIter::one((val).into()))
} else {
Err(format!("expected boolean, got `{json_val}`"))
}
Expand Down Expand Up @@ -313,7 +298,7 @@ impl MappingLeaf {
if !self.concatenate.is_empty() {
let concat_values = self
.typ
.tantivy_string_value_from_json(el_json_val.clone())
.tantivy_value_from_json(el_json_val.clone())
.map_err(|err_msg| DocParsingError::ValueError(path.join("."), err_msg))?;
for concat_value in concat_values {
for field in &self.concatenate {
Expand All @@ -333,7 +318,7 @@ impl MappingLeaf {
if !self.concatenate.is_empty() {
let concat_values = self
.typ
.tantivy_string_value_from_json(json_val.clone())
.tantivy_value_from_json(json_val.clone())
.map_err(|err_msg| DocParsingError::ValueError(path.join("."), err_msg))?;
for concat_value in concat_values {
for field in &self.concatenate {
Expand Down Expand Up @@ -982,8 +967,8 @@ fn build_mapping_tree_from_entries<'a>(
if mapping_node.branches.contains_key(name) {
bail!("duplicated field definition `{}`", name);
}
let text_options: TextOptions = options.clone().into();
let field = schema.add_text_field(name, text_options);
let text_options: JsonObjectOptions = options.clone().into();
let field = schema.add_json_field(name, text_options);
for sub_field in &options.concatenate_fields {
for matched_field in
mapping_node
Expand Down
8 changes: 1 addition & 7 deletions quickwit/quickwit-query/src/query_ast/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,7 @@ pub fn find_field_or_hit_dynamic<'a>(
};
let field_entry = schema.get_field_entry(field);
let typ = field_entry.field_type().value_type();
if path.is_empty() {
if typ == Type::Json {
return Err(InvalidQuery::JsonFieldRootNotSearchable {
full_path: full_path.to_string(),
});
}
} else if typ != Type::Json {
if !path.is_empty() && typ != Type::Json {
return Err(InvalidQuery::FieldDoesNotExist {
full_path: full_path.to_string(),
});
Expand Down
Loading