Skip to content

Commit

Permalink
sort by two fast fields
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz committed Jul 7, 2023
1 parent ae768bf commit 99411fa
Show file tree
Hide file tree
Showing 14 changed files with 574 additions and 223 deletions.
1 change: 1 addition & 0 deletions quickwit/quickwit-proto/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.type_attribute(".", "#[derive(Serialize, Deserialize, utoipa::ToSchema)]")
.type_attribute("IndexingTask", "#[derive(Eq, Hash)]")
.type_attribute("SearchRequest", "#[derive(Eq, Hash)]")
.type_attribute("SortField", "#[derive(Eq, Hash)]")
.type_attribute("DeleteQuery", "#[serde(default)]")
.field_attribute(
"DeleteQuery.start_timestamp",
Expand Down
37 changes: 27 additions & 10 deletions quickwit/quickwit-proto/protos/quickwit/search_api.proto
Original file line number Diff line number Diff line change
Expand Up @@ -95,21 +95,30 @@ message SearchRequest {
// deprecated tag field
reserved 8;

// Sort order
optional SortOrder sort_order = 9;
// deprecated `sort_order``
reserved 9;

// Sort by fast field. If unset sort by docid
// sort_by_field can be:
// - a field name
// - _score
// - None, in which case the hits will be sorted by (SplitId, doc_id).
optional string sort_by_field = 10;
// deprecated `sort_by_field``
reserved 10;

// json serialized aggregation_request
optional string aggregation_request = 11;

// Fields to extract snippet on
repeated string snippet_fields = 12;

// Optional sort by one or more fields (limited to 2 at the moment).
repeated SortField sort_fields = 14;
}

message SortField {
string field_name = 1;
SortOrder sort_order = 2;
}

enum SortOrder {
Expand Down Expand Up @@ -226,16 +235,13 @@ message PartialHit {
// - the split_id,
// - the segment_ord,
// - the doc id.
oneof sort_value {
uint64 u64 = 5;
int64 i64 = 6;
double f64 = 7;
bool boolean = 8;
}

// Deprecated
reserved 1;
// Room for eventual future sorted key types.
reserved 9 to 20;
reserved 12 to 20;
SortBy sort_value = 10;
SortBy sort_value2 = 11;

string split_id = 2;

Expand All @@ -247,6 +253,17 @@ message PartialHit {
uint32 doc_id = 4;
}

message SortBy {
oneof sort_value {
uint64 u64 = 1;
int64 i64 = 2;
double f64 = 3;
bool boolean = 4;
}
// Room for eventual future sorted key types.
reserved 5 to 20;
}

message LeafSearchResponse {
// Total number of documents matched by the query.
uint64 num_hits = 1;
Expand Down
27 changes: 23 additions & 4 deletions quickwit/quickwit-proto/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use ulid::Ulid;
mod quickwit;
mod quickwit_indexing_api;
mod quickwit_metastore_api;
pub use partial_hit::SortValue;
pub use sort_by::SortValue;
use std::cmp::Ordering;

pub mod indexing_api {
Expand Down Expand Up @@ -187,7 +187,6 @@ pub fn convert_to_grpc_result<T, E: ServiceError>(
}

impl TryFrom<SearchStreamRequest> for SearchRequest {

type Error = anyhow::Error;

fn try_from(search_stream_req: SearchStreamRequest) -> Result<Self, Self::Error> {
Expand All @@ -197,7 +196,7 @@ impl TryFrom<SearchStreamRequest> for SearchRequest {
snippet_fields: search_stream_req.snippet_fields,
start_timestamp: search_stream_req.start_timestamp,
end_timestamp: search_stream_req.end_timestamp,
.. Default::default()
..Default::default()
})
}
}
Expand All @@ -212,7 +211,7 @@ impl TryFrom<DeleteQuery> for SearchRequest {
query_ast: delete_query.query_ast,
start_timestamp: delete_query.start_timestamp,
end_timestamp: delete_query.end_timestamp,
.. Default::default()
..Default::default()
})
}
}
Expand Down Expand Up @@ -464,7 +463,27 @@ pub fn query_ast_from_user_text(user_text: &str, default_fields: Option<Vec<Stri
// Prost imposes the PartialEq derived implementation.
// This is terrible because this means Eq, PartialEq are not really in line with Ord's implementation.
// if in presence of NaN.
impl PartialOrd for SortBy {
fn partial_cmp(&self, other: &SortBy) -> Option<Ordering> {
self.sort_value.partial_cmp(&other.sort_value)
}
}
impl Ord for SortBy {
fn cmp(&self, other: &SortBy) -> Ordering {
self.sort_value.cmp(&other.sort_value)
}
}
impl Eq for SortBy {}
impl Copy for SortBy {}
impl From<SortValue> for SortBy {
fn from(sort_value: SortValue) -> Self {
SortBy {
sort_value: Some(sort_value),
}
}
}

impl Copy for SortValue {}
impl Eq for SortValue {}

impl Ord for SortValue {
Expand Down
80 changes: 41 additions & 39 deletions quickwit/quickwit-proto/src/quickwit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,25 @@ pub struct SearchRequest {
/// The results with rank [start_offset..start_offset + max_hits) are returned.
#[prost(uint64, tag = "7")]
pub start_offset: u64,
/// Sort order
#[prost(enumeration = "SortOrder", optional, tag = "9")]
pub sort_order: ::core::option::Option<i32>,
/// Sort by fast field. If unset sort by docid
/// sort_by_field can be:
/// - a field name
/// - _score
/// - None, in which case the hits will be sorted by (SplitId, doc_id).
#[prost(string, optional, tag = "10")]
pub sort_by_field: ::core::option::Option<::prost::alloc::string::String>,
/// json serialized aggregation_request
#[prost(string, optional, tag = "11")]
pub aggregation_request: ::core::option::Option<::prost::alloc::string::String>,
/// Fields to extract snippet on
#[prost(string, repeated, tag = "12")]
pub snippet_fields: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
/// Optional sort by one or more fields (limited to 2 at the moment).
#[prost(message, repeated, tag = "14")]
pub sort_fields: ::prost::alloc::vec::Vec<SortField>,
}
#[derive(Serialize, Deserialize, utoipa::ToSchema)]
#[derive(Eq, Hash)]
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SortField {
#[prost(string, tag = "1")]
pub field_name: ::prost::alloc::string::String,
#[prost(enumeration = "SortOrder", tag = "2")]
pub sort_order: i32,
}
#[derive(Serialize, Deserialize, utoipa::ToSchema)]
#[allow(clippy::derive_partial_eq_without_eq)]
Expand Down Expand Up @@ -164,10 +167,25 @@ pub struct Hit {
/// Instead, it holds a document_uri which is enough information to
/// go and fetch the actual document data, by performing a `get_doc(...)`
/// request.
///
/// Value of the sorting key for the given document.
///
/// Quickwit only computes top-K of this sorting field.
/// If the user requested for a bottom-K of a given fast field, then quickwit simply
/// emits an decreasing mapping of this fast field.
///
/// In case of a tie, quickwit uses the increasing order of
/// - the split_id,
/// - the segment_ord,
/// - the doc id.
#[derive(Serialize, Deserialize, utoipa::ToSchema)]
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct PartialHit {
#[prost(message, optional, tag = "10")]
pub sort_value: ::core::option::Option<SortBy>,
#[prost(message, optional, tag = "11")]
pub sort_value2: ::core::option::Option<SortBy>,
#[prost(string, tag = "2")]
pub split_id: ::prost::alloc::string::String,
/// (segment_ord, doc) form a tantivy DocAddress, which is sufficient to identify a document
Expand All @@ -177,43 +195,27 @@ pub struct PartialHit {
/// The DocId identifies a unique document at the scale of a tantivy segment.
#[prost(uint32, tag = "4")]
pub doc_id: u32,
/// Value of the sorting key for the given document.
///
/// Quickwit only computes top-K of this sorting field.
/// If the user requested for a bottom-K of a given fast field, then quickwit simply
/// emits an decreasing mapping of this fast field.
///
/// In case of a tie, quickwit uses the increasing order of
/// - the split_id,
/// - the segment_ord,
/// - the doc id.
#[prost(oneof = "partial_hit::SortValue", tags = "5, 6, 7, 8")]
pub sort_value: ::core::option::Option<partial_hit::SortValue>,
}
/// Nested message and enum types in `PartialHit`.
pub mod partial_hit {
/// Value of the sorting key for the given document.
///
/// Quickwit only computes top-K of this sorting field.
/// If the user requested for a bottom-K of a given fast field, then quickwit simply
/// emits an decreasing mapping of this fast field.
///
/// In case of a tie, quickwit uses the increasing order of
/// - the split_id,
/// - the segment_ord,
/// - the doc id.
#[derive(Serialize, Deserialize, utoipa::ToSchema)]
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SortBy {
#[prost(oneof = "sort_by::SortValue", tags = "1, 2, 3, 4")]
pub sort_value: ::core::option::Option<sort_by::SortValue>,
}
/// Nested message and enum types in `SortBy`.
pub mod sort_by {
#[derive(Serialize, Deserialize, utoipa::ToSchema)]
#[derive(Copy)]
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Oneof)]
pub enum SortValue {
#[prost(uint64, tag = "5")]
#[prost(uint64, tag = "1")]
U64(u64),
#[prost(int64, tag = "6")]
#[prost(int64, tag = "2")]
I64(i64),
#[prost(double, tag = "7")]
#[prost(double, tag = "3")]
F64(f64),
#[prost(bool, tag = "8")]
#[prost(bool, tag = "4")]
Boolean(bool),
}
}
Expand Down
3 changes: 2 additions & 1 deletion quickwit/quickwit-search/src/cluster_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ mod tests {

fn mock_partial_hit(split_id: &str, sort_value: u64, doc_id: u32) -> PartialHit {
PartialHit {
sort_value: Some(SortValue::U64(sort_value)),
sort_value: Some(SortValue::U64(sort_value).into()),
sort_value2: None,
split_id: split_id.to_string(),
segment_ord: 1,
doc_id,
Expand Down
Loading

0 comments on commit 99411fa

Please sign in to comment.