quickwit-oss · rdettai · Sep 13, 2024 · Sep 13, 2024 · Sep 13, 2024 · Sep 13, 2024
diff --git a/docs/internals/ingest-v2.md b/docs/internals/ingest-v2.md
@@ -18,7 +18,3 @@ indexer:
 See [full configuration example](https://github.com/quickwit-oss/quickwit/blob/main/config/quickwit.yaml).
 
 The only way to use the ingest API V2 is to use the [bulk endpoint](../reference/es_compatible_api.md#_bulk--batch-ingestion-endpoint) of the Elasticsearch-compatible API. The native Quickwit API is not yet compatible with the ingest V2 API.
-
-## Caveats
-
-The `refresh` parameter is not yet supported on the ingest V2 API.
diff --git a/docs/overview/concepts/querying.md b/docs/overview/concepts/querying.md
@@ -105,3 +105,7 @@ Quickwit does caching in many places to deliver a highly performing query engine
 ### Scoring
 
 Quickwit supports sorting docs by their BM25 scores. In order to query by score, [fieldnorms](../../configuration/index-config.md#text-type) must be enabled for the field. By default, BM25 scoring is disabled to improve query latencies but it can be opt-in by setting the `sort_by` option to `_score` in queries.
+
+### Document ID
+
+Each document in Quickwit is assigned a unique document ID which is a combination of the split ID and the Tantivy DocId within the split. You cannot assign a custom ID. This ID is used for every search query as sort order (after the explicitly specified sort values) to make the results deterministic.
diff --git a/docs/reference/rest-api.md b/docs/reference/rest-api.md
@@ -67,7 +67,7 @@ POST api/v1/<index id>/search
 | `max_hits`        | `Integer`  | Maximum number of hits to return (by default 20) | `20` |
 | `search_field`    | `[String]` | Fields to search on if no field name is specified in the query. Comma-separated list, e.g. "field1,field2"  | index_config.search_settings.default_search_fields |
 | `snippet_fields`  | `[String]` | Fields to extract snippet on. Comma-separated list, e.g. "field1,field2"  | |
-| `sort_by`         | `[String]` | Fields to sort the query results on. You can sort by one or two fast fields or by BM25 `_score` (requires fieldnorms). By default, hits are sorted by their document ID. | |
+| `sort_by`         | `[String]` | Fields to sort the query results on. You can sort by one or two fast fields or by BM25 `_score` (requires fieldnorms). By default, hits are sorted in reverse order of their [document ID](/docs/overview/concepts/querying.md#document-id) (to show recent events first). | |
 | `format`          | `Enum`     | The output format. Allowed values are "json" or "pretty_json" | `pretty_json` |
 | `aggs`            | `JSON`     | The aggregations request. See the [aggregations doc](aggregation.md) for supported aggregations. | |
 

diff --git a/quickwit/quickwit-indexing/src/test_utils.rs b/quickwit/quickwit-indexing/src/test_utils.rs
@@ -150,7 +150,7 @@ impl TestSandbox {
         })
     }
 
-    /// Adds documents.
+    /// Adds documents and waits for them to be indexed (creating a separate split).
     ///
     /// The documents are expected to be `JsonValue`.
     /// They can be created using the `serde_json::json!` macro.

diff --git a/quickwit/quickwit-search/src/tests.rs b/quickwit/quickwit-search/src/tests.rs
@@ -266,7 +266,7 @@ async fn test_slop_queries() {
 }
 
 // TODO remove me once `Iterator::is_sorted_by_key` is stabilized.
-fn is_sorted<E, I: Iterator<Item = E>>(mut it: I) -> bool
+fn is_reverse_sorted<E, I: Iterator<Item = E>>(mut it: I) -> bool
 where E: Ord {
     let mut previous_el = if let Some(first_el) = it.next() {
         first_el
@@ -275,7 +275,7 @@ where E: Ord {
         return true;
     };
     for next_el in it {
-        if next_el < previous_el {
+        if next_el > previous_el {
             return false;
         }
         previous_el = next_el;
@@ -284,7 +284,6 @@ where E: Ord {
 }
 
 #[tokio::test]
-#[cfg_attr(not(feature = "ci-test"), ignore)]
 async fn test_single_node_several_splits() -> anyhow::Result<()> {
     let index_id = "single-node-several-splits";
     let doc_mapping_yaml = r#"
@@ -324,17 +323,14 @@ async fn test_single_node_several_splits() -> anyhow::Result<()> {
     .await?;
     assert_eq!(single_node_result.num_hits, 20);
     assert_eq!(single_node_result.hits.len(), 6);
-    assert!(&single_node_result.hits[0].json.contains("Snoopy"));
-    assert!(&single_node_result.hits[1].json.contains("breed"));
-    assert!(is_sorted(single_node_result.hits.iter().flat_map(|hit| {
-        hit.partial_hit.as_ref().map(|partial_hit| {
-            (
-                partial_hit.sort_value,
-                partial_hit.split_id.as_str(),
-                partial_hit.doc_id,
-            )
-        })
-    })));
+    assert!(&single_node_result.hits[0].json.contains("breed"));
+    assert!(&single_node_result.hits[1].json.contains("Snoopy"));
+    let hit_keys = single_node_result.hits.iter().flat_map(|hit| {
+        hit.partial_hit
+            .as_ref()
+            .map(|partial_hit| (partial_hit.split_id.as_str(), partial_hit.doc_id as i32))
+    });
+    assert!(is_reverse_sorted(hit_keys));
     assert!(single_node_result.elapsed_time_micros > 10);
     assert!(single_node_result.elapsed_time_micros < 1_000_000);
     test_sandbox.assert_quit().await;