fix clippy lints from 1.80-1.81 (#2488)

* fix some clippy lints * fix clippy::doc_lazy_continuation * fix some lints for 1.82
quickwit-oss · Sep 5, 2024 · 85395d9 · 85395d9
1 parent a206c3c
commit 85395d9
Show file tree

Hide file tree

Showing 30 changed files with 79 additions and 98 deletions.
diff --git a/bitpacker/src/bitpacker.rs b/bitpacker/src/bitpacker.rs
@@ -368,9 +368,9 @@ mod test {
                 for start_idx in 0u32..32u32 {
                     output.resize(len, 0);
                     bitunpacker.get_batch_u32s(start_idx, &buffer, &mut output);
-                    for i in 0..len {
+                    for (i, output_byte) in output.iter().enumerate() {
                         let expected = (start_idx + i as u32) & mask;
-                        assert_eq!(output[i], expected);
+                        assert_eq!(*output_byte, expected);
                     }
                 }
             }

diff --git a/columnar/src/column_index/optional_index/tests.rs b/columnar/src/column_index/optional_index/tests.rs
@@ -110,8 +110,8 @@ fn test_null_index(data: &[bool]) {
         .map(|(pos, _val)| pos as u32)
         .collect();
     let mut select_iter = null_index.select_cursor();
-    for i in 0..orig_idx_with_value.len() {
-        assert_eq!(select_iter.select(i as u32), orig_idx_with_value[i]);
+    for (i, expected) in orig_idx_with_value.iter().enumerate() {
+        assert_eq!(select_iter.select(i as u32), *expected);
     }
 
     let step_size = (orig_idx_with_value.len() / 100).max(1);

diff --git a/columnar/src/column_values/u64_based/line.rs b/columnar/src/column_values/u64_based/line.rs
@@ -125,9 +125,8 @@ impl Line {
     /// Returns a line that attemps to approximate a function
     /// f: i in 0..[ys.num_vals()) -> ys[i].
     ///
-    /// - The approximation is always lower than the actual value.
-    /// Or more rigorously, formally `f(i).wrapping_sub(ys[i])` is small
-    /// for any i in [0..ys.len()).
+    /// - The approximation is always lower than the actual value. Or more rigorously, formally
+    ///   `f(i).wrapping_sub(ys[i])` is small for any i in [0..ys.len()).
     /// - It computes without panicking for any value of it.
     ///
     /// This function is only invariable by translation if all of the

diff --git a/columnar/src/columnar/merge/mod.rs b/columnar/src/columnar/merge/mod.rs
@@ -64,10 +64,9 @@ impl From<ColumnType> for ColumnTypeCategory {
 /// resulting columnar. When a required column is a numerical column type, one of two things can
 /// happen:
 /// - If the required column type is compatible with all of the input columnar, the resulsting
-///   merged
-/// columnar will simply coerce the input column and use the required column type.
-/// - If the required column type is incompatible with one of the input columnar, the merged
-/// will fail with an InvalidData error.
+///   merged columnar will simply coerce the input column and use the required column type.
+/// - If the required column type is incompatible with one of the input columnar, the merged will
+///   fail with an InvalidData error.
 ///
 /// `merge_row_order` makes it possible to remove or reorder row in the resulting
 /// `Columnar` table.

diff --git a/columnar/src/columnar/merge/term_merger.rs b/columnar/src/columnar/merge/term_merger.rs
@@ -35,8 +35,7 @@ impl<'a> Ord for HeapItem<'a> {
 ///
 /// The item yield is actually a pair with
 /// - the term
-/// - a slice with the ordinal of the segments containing
-/// the terms.
+/// - a slice with the ordinal of the segments containing the terms.
 pub struct TermMerger<'a> {
     heap: BinaryHeap<HeapItem<'a>>,
     current_streamers: Vec<HeapItem<'a>>,

diff --git a/query-grammar/src/infallible.rs b/query-grammar/src/infallible.rs
@@ -109,6 +109,9 @@ where F: nom::Parser<I, (O, ErrorList), Infallible> {
     move |input: I| match f.parse(input) {
         Ok((input, (output, _err))) => Ok((input, output)),
         Err(Err::Incomplete(needed)) => Err(Err::Incomplete(needed)),
+        // old versions don't understand this is uninhabited and need the empty match to help,
+        // newer versions warn because this arm is unreachable (which it is indeed).
+        #[allow(unreachable_patterns)]
         Err(Err::Error(val)) | Err(Err::Failure(val)) => match val {},
     }
 }

diff --git a/src/directory/directory.rs b/src/directory/directory.rs
@@ -102,10 +102,8 @@ fn retry_policy(is_blocking: bool) -> RetryPolicy {
 ///
 /// There are currently two implementations of `Directory`
 ///
-/// - The [`MMapDirectory`][crate::directory::MmapDirectory], this
-/// should be your default choice.
-/// - The [`RamDirectory`][crate::directory::RamDirectory], which
-/// should be used mostly for tests.
+/// - The [`MMapDirectory`][crate::directory::MmapDirectory], this should be your default choice.
+/// - The [`RamDirectory`][crate::directory::RamDirectory], which should be used mostly for tests.
 pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
     /// Opens a file and returns a boxed `FileHandle`.
     ///

diff --git a/src/fastfield/facet_reader.rs b/src/fastfield/facet_reader.rs
@@ -25,10 +25,9 @@ impl FacetReader {
     /// Creates a new `FacetReader`.
     ///
     /// A facet reader just wraps :
-    /// - a `MultiValuedFastFieldReader` that makes it possible to
-    /// access the list of facet ords for a given document.
-    /// - a `TermDictionary` that helps associating a facet to
-    /// an ordinal and vice versa.
+    /// - a `MultiValuedFastFieldReader` that makes it possible to access the list of facet ords for
+    ///   a given document.
+    /// - a `TermDictionary` that helps associating a facet to an ordinal and vice versa.
     pub fn new(facet_column: StrColumn) -> FacetReader {
         FacetReader { facet_column }
     }

diff --git a/src/future_result.rs b/src/future_result.rs
@@ -11,8 +11,8 @@ use crate::TantivyError;
 /// progress. Dropping the `FutureResult` does not cancel the task being executed
 /// either.
 ///
-/// - In a sync context, you can call `FutureResult::wait()`. The function
-/// does not rely on `block_on`.
+/// - In a sync context, you can call `FutureResult::wait()`. The function does not rely on
+///   `block_on`.
 /// - In an async context, you can call simply use `FutureResult` as a future.
 pub struct FutureResult<T> {
     inner: Inner<T>,

diff --git a/src/index/index.rs b/src/index/index.rs
@@ -49,10 +49,8 @@ fn load_metas(
 /// Save the index meta file.
 /// This operation is atomic :
 /// Either
-///  - it fails, in which case an error is returned,
-/// and the `meta.json` remains untouched,
-/// - it succeeds, and `meta.json` is written
-/// and flushed.
+/// - it fails, in which case an error is returned, and the `meta.json` remains untouched,
+/// - it succeeds, and `meta.json` is written and flushed.
 ///
 /// This method is not part of tantivy's public API
 fn save_new_metas(
@@ -529,12 +527,12 @@ impl Index {
     /// `IndexWriter` on the system is accessing the index directory,
     /// it is safe to manually delete the lockfile.
     ///
-    /// - `num_threads` defines the number of indexing workers that
-    /// should work at the same time.
+    /// - `num_threads` defines the number of indexing workers that should work at the same time.
     ///
-    /// - `overall_memory_budget_in_bytes` sets the amount of memory
-    /// allocated for all indexing thread.
-    /// Each thread will receive a budget of  `overall_memory_budget_in_bytes / num_threads`.
+    /// - `overall_memory_budget_in_bytes` sets the amount of memory allocated for all indexing
+    ///   thread.
+    ///
+    /// Each thread will receive a budget of `overall_memory_budget_in_bytes / num_threads`.
     ///
     /// # Errors
     /// If the lockfile already exists, returns `Error::DirectoryLockBusy` or an `Error::IoError`.

diff --git a/src/indexer/delete_queue.rs b/src/indexer/delete_queue.rs
@@ -179,8 +179,7 @@ impl DeleteCursor {
     /// Skips operations and position it so that
     /// - either all of the delete operation currently in the queue are consume and the next get
     ///   will return `None`.
-    /// - the next get will return the first operation with an
-    /// `opstamp >= target_opstamp`.
+    /// - the next get will return the first operation with an `opstamp >= target_opstamp`.
     pub fn skip_to(&mut self, target_opstamp: Opstamp) {
         // TODO Can be optimize as we work with block.
         while self.is_behind_opstamp(target_opstamp) {

diff --git a/src/indexer/merge_operation.rs b/src/indexer/merge_operation.rs
@@ -29,8 +29,8 @@ impl MergeOperationInventory {
 
 /// A `MergeOperation` has two roles.
 /// It carries all of the information required to describe a merge:
-/// - `target_opstamp` is the opstamp up to which we want to consume the
-/// delete queue and reflect their deletes.
+/// - `target_opstamp` is the opstamp up to which we want to consume the delete queue and reflect
+///   their deletes.
 /// - `segment_ids` is the list of segment to be merged.
 ///
 /// The second role is to ensure keep track of the fact that these

diff --git a/src/indexer/segment_entry.rs b/src/indexer/segment_entry.rs
@@ -10,12 +10,9 @@ use crate::indexer::delete_queue::DeleteCursor;
 ///
 /// In addition to segment `meta`,
 /// it contains a few transient states
-/// - `alive_bitset` is a bitset describing
-/// documents that were alive during the commit
-/// itself.
-/// - `delete_cursor` is the position in the delete queue.
-/// Deletes happening before the cursor are reflected either
-/// in the .del file or in the `alive_bitset`.
+/// - `alive_bitset` is a bitset describing documents that were alive during the commit itself.
+/// - `delete_cursor` is the position in the delete queue. Deletes happening before the cursor are
+///   reflected either in the .del file or in the `alive_bitset`.
 #[derive(Clone)]
 pub struct SegmentEntry {
     meta: SegmentMeta,

diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs
@@ -30,10 +30,8 @@ const NUM_MERGE_THREADS: usize = 4;
 /// Save the index meta file.
 /// This operation is atomic:
 /// Either
-///  - it fails, in which case an error is returned,
-/// and the `meta.json` remains untouched,
-/// - it success, and `meta.json` is written
-/// and flushed.
+/// - it fails, in which case an error is returned, and the `meta.json` remains untouched,
+/// - it success, and `meta.json` is written and flushed.
 ///
 /// This method is not part of tantivy's public API
 pub(crate) fn save_metas(metas: &IndexMeta, directory: &dyn Directory) -> crate::Result<()> {

diff --git a/src/lib.rs b/src/lib.rs
@@ -125,8 +125,8 @@
 //!
 //! - **Searching**: [Searcher] searches the segments with anything that implements
 //!   [Query](query::Query) and merges the results. The list of [supported
-//! queries](query::Query#implementors). Custom Queries are supported by implementing the
-//! [Query](query::Query) trait.
+//!   queries](query::Query#implementors). Custom Queries are supported by implementing the
+//!   [Query](query::Query) trait.
 //!
 //! - **[Directory](directory)**: Abstraction over the storage where the index data is stored.
 //!

diff --git a/src/postings/block_search.rs b/src/postings/block_search.rs
@@ -18,7 +18,7 @@ use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
 /// # Assumption
 ///
 /// - The block is sorted. Some elements may appear several times. This is the case at the
-/// end of the last block for instance.
+///   end of the last block for instance.
 /// - The target is assumed smaller or equal to the last element of the block.
 pub fn branchless_binary_search(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
     let mut start = 0;

diff --git a/src/query/boolean_query/boolean_query.rs b/src/query/boolean_query/boolean_query.rs
@@ -5,15 +5,10 @@ use crate::schema::{IndexRecordOption, Term};
 /// The boolean query returns a set of documents
 /// that matches the Boolean combination of constituent subqueries.
 ///
-/// The documents matched by the boolean query are
-/// those which
-/// * match all of the sub queries associated with the
-/// `Must` occurrence
-/// * match none of the sub queries associated with the
-/// `MustNot` occurrence.
-/// * match at least one of the sub queries associated
-/// with the `Must` or `Should` occurrence.
-///
+/// The documents matched by the boolean query are those which
+/// - match all of the sub queries associated with the `Must` occurrence
+/// - match none of the sub queries associated with the `MustNot` occurrence.
+/// - match at least one of the sub queries associated with the `Must` or `Should` occurrence.
 ///
 /// You can combine other query types and their `Occur`ances into one `BooleanQuery`
 ///

diff --git a/src/query/phrase_prefix_query/phrase_prefix_scorer.rs b/src/query/phrase_prefix_query/phrase_prefix_scorer.rs
@@ -6,6 +6,9 @@ use crate::query::phrase_query::{intersection_count, PhraseScorer};
 use crate::query::Scorer;
 use crate::{DocId, Score};
 
+// MultiPrefix is the larger variant, and also the one we expect most often. PhraseScorer is > 1kB
+// though, it would be interesting to slim it down if possible.
+#[allow(clippy::large_enum_variant)]
 enum PhraseKind<TPostings: Postings> {
     SinglePrefix {
         position_offset: u32,

diff --git a/src/query/phrase_query/phrase_scorer.rs b/src/query/phrase_query/phrase_scorer.rs
@@ -219,8 +219,8 @@ fn intersection_exists_with_slop(
 /// In contrast to the regular algorithm this solves some issues:
 /// - Keep track of the slop so far. Slop is a budget that is spent on the distance between terms.
 /// - When encountering a match between two positions, which position is the best match is unclear
-/// and depends on intersections afterwards, therefore this algorithm keeps left and right as
-/// matches, but only counts one.
+///   and depends on intersections afterwards, therefore this algorithm keeps left and right as
+///   matches, but only counts one.
 ///
 /// This algorithm may return an incorrect count in some cases (e.g. left, right expansion and is
 /// then matches both on the following term.)

diff --git a/src/query/query.rs b/src/query/query.rs
@@ -115,10 +115,10 @@ impl<'a> EnableScoring<'a> {
 ///
 /// So to sum it up :
 /// - a `Query` is a recipe to define a set of documents as well the way to score them.
-/// - a [`Weight`] is this recipe tied to a specific [`Searcher`]. It may for instance
-/// hold statistics about the different term of the query. It is created by the query.
-/// - a [`Scorer`] is a cursor over the set of matching documents, for a specific
-/// [`SegmentReader`]. It is created by the [`Weight`].
+/// - a [`Weight`] is this recipe tied to a specific [`Searcher`]. It may for instance hold
+///   statistics about the different term of the query. It is created by the query.
+/// - a [`Scorer`] is a cursor over the set of matching documents, for a specific [`SegmentReader`].
+///   It is created by the [`Weight`].
 ///
 /// When implementing a new type of `Query`, it is normal to implement a
 /// dedicated `Query`, [`Weight`] and [`Scorer`].

diff --git a/src/query/range_query/fast_field_range_doc_set.rs b/src/query/range_query/fast_field_range_doc_set.rs
@@ -49,10 +49,10 @@ pub(crate) struct RangeDocSet<T> {
     ///
     /// There are two patterns.
     /// - We do a full scan. => We can load large chunks. We don't know in advance if seek call
-    /// will come, so we start with small chunks
+    ///   will come, so we start with small chunks
     /// - We load docs, interspersed with seek calls. When there are big jumps in the seek, we
-    /// should load small chunks. When the seeks are small, we can employ the same strategy as on a
-    /// full scan.
+    ///   should load small chunks. When the seeks are small, we can employ the same strategy as on
+    ///   a full scan.
     fetch_horizon: u32,
     /// Current batch of loaded docs.
     loaded_docs: VecCursor,

diff --git a/src/schema/facet.rs b/src/schema/facet.rs
@@ -169,7 +169,7 @@ impl Facet {
 
     /// Extract path from the `Facet`.
     pub fn to_path(&self) -> Vec<&str> {
-        self.encoded_str().split(|c| c == FACET_SEP_CHAR).collect()
+        self.encoded_str().split(FACET_SEP_CHAR).collect()
     }
 
     /// This function is the inverse of Facet::from(&str).

diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs
@@ -12,8 +12,7 @@ use crate::schema::{
 ///
 /// It consists of
 /// - a field name
-/// - a field type, itself wrapping up options describing
-/// how the field should be indexed.
+/// - a field type, itself wrapping up options describing how the field should be indexed.
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct FieldEntry {
     name: String,

diff --git a/src/schema/term.rs b/src/schema/term.rs
@@ -639,12 +639,11 @@ mod tests {
     /// <field> + <type byte> + <value len>
     ///
     /// - <field> is a big endian encoded u32 field id
-    /// - <type_byte>'s most significant bit expresses whether the term is a json term or not
-    /// The remaining 7 bits are used to encode the type of the value.
-    /// If this is a JSON term, the type is the type of the leaf of the json.
-    ///
+    /// - <type_byte>'s most significant bit expresses whether the term is a json term or not The
+    ///   remaining 7 bits are used to encode the type of the value. If this is a JSON term, the
+    ///   type is the type of the leaf of the json.
     /// - <value> is,  if this is not the json term, a binary representation specific to the type.
-    /// If it is a JSON Term, then it is prepended with the path that leads to this leaf value.
+    ///   If it is a JSON Term, then it is prepended with the path that leads to this leaf value.
     const FAST_VALUE_TERM_LEN: usize = 4 + 1 + 8;
 
     #[test]

diff --git a/src/schema/text_options.rs b/src/schema/text_options.rs
@@ -189,8 +189,8 @@ impl TokenizerName {
 ///
 /// It defines
 /// - The amount of information that should be stored about the presence of a term in a document.
-/// Essentially, should we store the term frequency and/or the positions (See
-/// [`IndexRecordOption`]).
+///   Essentially, should we store the term frequency and/or the positions (See
+///   [`IndexRecordOption`]).
 /// - The name of the `Tokenizer` that should be used to process the field.
 /// - Flag indicating, if fieldnorms should be stored (See [fieldnorm](crate::fieldnorm)). Defaults
 ///   to `true`.

diff --git a/src/store/mod.rs b/src/store/mod.rs
@@ -25,8 +25,8 @@
 //! Most users should not access the `StoreReader` directly
 //! and should rely on either
 //!
-//! - at the segment level, the
-//! [`SegmentReader`'s `doc` method](../struct.SegmentReader.html#method.doc)
+//! - at the segment level, the [`SegmentReader`'s `doc`
+//!   method](../struct.SegmentReader.html#method.doc)
 //! - at the index level, the [`Searcher::doc()`](crate::Searcher::doc) method
 
 mod compressors;

diff --git a/src/termdict/fst_termdict/merger.rs b/src/termdict/fst_termdict/merger.rs
@@ -11,8 +11,7 @@ use crate::termdict::{TermOrdinal, TermStreamer};
 ///
 /// The item yielded is actually a pair with
 /// - the term
-/// - a slice with the ordinal of the segments containing
-/// the term.
+/// - a slice with the ordinal of the segments containing the term.
 pub struct TermMerger<'a> {
     dictionaries: Vec<&'a TermDictionary>,
     union: Union<'a>,

diff --git a/src/termdict/sstable_termdict/merger.rs b/src/termdict/sstable_termdict/merger.rs
@@ -34,8 +34,7 @@ impl<'a> Ord for HeapItem<'a> {
 ///
 /// The item yield is actually a pair with
 /// - the term
-/// - a slice with the ordinal of the segments containing
-/// the terms.
+/// - a slice with the ordinal of the segments containing the terms.
 pub struct TermMerger<'a> {
     heap: BinaryHeap<HeapItem<'a>>,
     current_streamers: Vec<HeapItem<'a>>,

diff --git a/src/tokenizer/tokenizer_manager.rs b/src/tokenizer/tokenizer_manager.rs
@@ -12,14 +12,12 @@ use crate::tokenizer::{
 ///
 /// By default, it is populated with the following managers.
 ///
-///  * `raw` : does not process nor tokenize the text.
-///  * `default` : Chops the text on according to whitespace and
-///  punctuation, removes tokens that are too long, and lowercases
-///  tokens
-///  * `en_stem` : Like `default`, but also applies stemming on the
-///  resulting tokens. Stemming can improve the recall of your
-///  search engine.
-/// * `whitespace` : Splits the text on whitespaces.
+/// - `raw` : does not process nor tokenize the text.
+/// - `default` : Chops the text on according to whitespace and punctuation, removes tokens that are
+///   too long, and lowercases tokens.
+/// - `en_stem` : Like `default`, but also applies stemming on the resulting tokens. Stemming can
+///   improve the recall of your search engine.
+/// - `whitespace` : Splits the text on whitespaces.
 #[derive(Clone)]
 pub struct TokenizerManager {
     tokenizers: Arc<RwLock<HashMap<String, TextAnalyzer>>>,

diff --git a/stacker/src/memory_arena.rs b/stacker/src/memory_arena.rs
@@ -12,13 +12,13 @@
 //! # Limitations
 //!
 //! - Your object shall not implement `Drop`.
-//! - `Addr` to the `Arena` are 32-bits. The maximum capacity of the arena
-//! is 4GB. *(Tantivy's indexer uses one arena per indexing thread.)*
-//! - The arena only works for objects much smaller than  `1MB`.
-//! Allocating more than `1MB` at a time will result in a panic,
-//! and allocating a lot of large object (> 500KB) will result in a fragmentation.
-//! - Your objects are store in an unaligned fashion. For this reason,
-//! the API does not let you access them as references.
+//! - `Addr` to the `Arena` are 32-bits. The maximum capacity of the arena is 4GB. *(Tantivy's
+//!   indexer uses one arena per indexing thread.)*
+//! - The arena only works for objects much smaller than  `1MB`. Allocating more than `1MB` at a
+//!   time will result in a panic, and allocating a lot of large object (> 500KB) will result in a
+//!   fragmentation.
+//! - Your objects are store in an unaligned fashion. For this reason, the API does not let you
+//!   access them as references.
 //!
 //! Instead, you store and access your data via `.write(...)` and `.read(...)`, which under the hood
 //! stores your object using `ptr::write_unaligned` and `ptr::read_unaligned`.