Skip to content

Commit

Permalink
fix clippy lints from 1.80-1.81 (#2488)
Browse files Browse the repository at this point in the history
* fix some clippy lints

* fix clippy::doc_lazy_continuation

* fix some lints for 1.82
  • Loading branch information
trinity-1686a authored Sep 5, 2024
1 parent a206c3c commit 85395d9
Show file tree
Hide file tree
Showing 30 changed files with 79 additions and 98 deletions.
4 changes: 2 additions & 2 deletions bitpacker/src/bitpacker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -368,9 +368,9 @@ mod test {
for start_idx in 0u32..32u32 {
output.resize(len, 0);
bitunpacker.get_batch_u32s(start_idx, &buffer, &mut output);
for i in 0..len {
for (i, output_byte) in output.iter().enumerate() {
let expected = (start_idx + i as u32) & mask;
assert_eq!(output[i], expected);
assert_eq!(*output_byte, expected);
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions columnar/src/column_index/optional_index/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ fn test_null_index(data: &[bool]) {
.map(|(pos, _val)| pos as u32)
.collect();
let mut select_iter = null_index.select_cursor();
for i in 0..orig_idx_with_value.len() {
assert_eq!(select_iter.select(i as u32), orig_idx_with_value[i]);
for (i, expected) in orig_idx_with_value.iter().enumerate() {
assert_eq!(select_iter.select(i as u32), *expected);
}

let step_size = (orig_idx_with_value.len() / 100).max(1);
Expand Down
5 changes: 2 additions & 3 deletions columnar/src/column_values/u64_based/line.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,8 @@ impl Line {
/// Returns a line that attemps to approximate a function
/// f: i in 0..[ys.num_vals()) -> ys[i].
///
/// - The approximation is always lower than the actual value.
/// Or more rigorously, formally `f(i).wrapping_sub(ys[i])` is small
/// for any i in [0..ys.len()).
/// - The approximation is always lower than the actual value. Or more rigorously, formally
/// `f(i).wrapping_sub(ys[i])` is small for any i in [0..ys.len()).
/// - It computes without panicking for any value of it.
///
/// This function is only invariable by translation if all of the
Expand Down
7 changes: 3 additions & 4 deletions columnar/src/columnar/merge/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,9 @@ impl From<ColumnType> for ColumnTypeCategory {
/// resulting columnar. When a required column is a numerical column type, one of two things can
/// happen:
/// - If the required column type is compatible with all of the input columnar, the resulsting
/// merged
/// columnar will simply coerce the input column and use the required column type.
/// - If the required column type is incompatible with one of the input columnar, the merged
/// will fail with an InvalidData error.
/// merged columnar will simply coerce the input column and use the required column type.
/// - If the required column type is incompatible with one of the input columnar, the merged will
/// fail with an InvalidData error.
///
/// `merge_row_order` makes it possible to remove or reorder row in the resulting
/// `Columnar` table.
Expand Down
3 changes: 1 addition & 2 deletions columnar/src/columnar/merge/term_merger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,7 @@ impl<'a> Ord for HeapItem<'a> {
///
/// The item yield is actually a pair with
/// - the term
/// - a slice with the ordinal of the segments containing
/// the terms.
/// - a slice with the ordinal of the segments containing the terms.
pub struct TermMerger<'a> {
heap: BinaryHeap<HeapItem<'a>>,
current_streamers: Vec<HeapItem<'a>>,
Expand Down
3 changes: 3 additions & 0 deletions query-grammar/src/infallible.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ where F: nom::Parser<I, (O, ErrorList), Infallible> {
move |input: I| match f.parse(input) {
Ok((input, (output, _err))) => Ok((input, output)),
Err(Err::Incomplete(needed)) => Err(Err::Incomplete(needed)),
// old versions don't understand this is uninhabited and need the empty match to help,
// newer versions warn because this arm is unreachable (which it is indeed).
#[allow(unreachable_patterns)]
Err(Err::Error(val)) | Err(Err::Failure(val)) => match val {},
}
}
Expand Down
6 changes: 2 additions & 4 deletions src/directory/directory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,8 @@ fn retry_policy(is_blocking: bool) -> RetryPolicy {
///
/// There are currently two implementations of `Directory`
///
/// - The [`MMapDirectory`][crate::directory::MmapDirectory], this
/// should be your default choice.
/// - The [`RamDirectory`][crate::directory::RamDirectory], which
/// should be used mostly for tests.
/// - The [`MMapDirectory`][crate::directory::MmapDirectory], this should be your default choice.
/// - The [`RamDirectory`][crate::directory::RamDirectory], which should be used mostly for tests.
pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
/// Opens a file and returns a boxed `FileHandle`.
///
Expand Down
7 changes: 3 additions & 4 deletions src/fastfield/facet_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,9 @@ impl FacetReader {
/// Creates a new `FacetReader`.
///
/// A facet reader just wraps :
/// - a `MultiValuedFastFieldReader` that makes it possible to
/// access the list of facet ords for a given document.
/// - a `TermDictionary` that helps associating a facet to
/// an ordinal and vice versa.
/// - a `MultiValuedFastFieldReader` that makes it possible to access the list of facet ords for
/// a given document.
/// - a `TermDictionary` that helps associating a facet to an ordinal and vice versa.
pub fn new(facet_column: StrColumn) -> FacetReader {
FacetReader { facet_column }
}
Expand Down
4 changes: 2 additions & 2 deletions src/future_result.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ use crate::TantivyError;
/// progress. Dropping the `FutureResult` does not cancel the task being executed
/// either.
///
/// - In a sync context, you can call `FutureResult::wait()`. The function
/// does not rely on `block_on`.
/// - In a sync context, you can call `FutureResult::wait()`. The function does not rely on
/// `block_on`.
/// - In an async context, you can call simply use `FutureResult` as a future.
pub struct FutureResult<T> {
inner: Inner<T>,
Expand Down
16 changes: 7 additions & 9 deletions src/index/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,8 @@ fn load_metas(
/// Save the index meta file.
/// This operation is atomic :
/// Either
/// - it fails, in which case an error is returned,
/// and the `meta.json` remains untouched,
/// - it succeeds, and `meta.json` is written
/// and flushed.
/// - it fails, in which case an error is returned, and the `meta.json` remains untouched,
/// - it succeeds, and `meta.json` is written and flushed.
///
/// This method is not part of tantivy's public API
fn save_new_metas(
Expand Down Expand Up @@ -529,12 +527,12 @@ impl Index {
/// `IndexWriter` on the system is accessing the index directory,
/// it is safe to manually delete the lockfile.
///
/// - `num_threads` defines the number of indexing workers that
/// should work at the same time.
/// - `num_threads` defines the number of indexing workers that should work at the same time.
///
/// - `overall_memory_budget_in_bytes` sets the amount of memory
/// allocated for all indexing thread.
/// Each thread will receive a budget of `overall_memory_budget_in_bytes / num_threads`.
/// - `overall_memory_budget_in_bytes` sets the amount of memory allocated for all indexing
/// thread.
///
/// Each thread will receive a budget of `overall_memory_budget_in_bytes / num_threads`.
///
/// # Errors
/// If the lockfile already exists, returns `Error::DirectoryLockBusy` or an `Error::IoError`.
Expand Down
3 changes: 1 addition & 2 deletions src/indexer/delete_queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,7 @@ impl DeleteCursor {
/// Skips operations and position it so that
/// - either all of the delete operation currently in the queue are consume and the next get
/// will return `None`.
/// - the next get will return the first operation with an
/// `opstamp >= target_opstamp`.
/// - the next get will return the first operation with an `opstamp >= target_opstamp`.
pub fn skip_to(&mut self, target_opstamp: Opstamp) {
// TODO Can be optimize as we work with block.
while self.is_behind_opstamp(target_opstamp) {
Expand Down
4 changes: 2 additions & 2 deletions src/indexer/merge_operation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ impl MergeOperationInventory {

/// A `MergeOperation` has two roles.
/// It carries all of the information required to describe a merge:
/// - `target_opstamp` is the opstamp up to which we want to consume the
/// delete queue and reflect their deletes.
/// - `target_opstamp` is the opstamp up to which we want to consume the delete queue and reflect
/// their deletes.
/// - `segment_ids` is the list of segment to be merged.
///
/// The second role is to ensure keep track of the fact that these
Expand Down
9 changes: 3 additions & 6 deletions src/indexer/segment_entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,9 @@ use crate::indexer::delete_queue::DeleteCursor;
///
/// In addition to segment `meta`,
/// it contains a few transient states
/// - `alive_bitset` is a bitset describing
/// documents that were alive during the commit
/// itself.
/// - `delete_cursor` is the position in the delete queue.
/// Deletes happening before the cursor are reflected either
/// in the .del file or in the `alive_bitset`.
/// - `alive_bitset` is a bitset describing documents that were alive during the commit itself.
/// - `delete_cursor` is the position in the delete queue. Deletes happening before the cursor are
/// reflected either in the .del file or in the `alive_bitset`.
#[derive(Clone)]
pub struct SegmentEntry {
meta: SegmentMeta,
Expand Down
6 changes: 2 additions & 4 deletions src/indexer/segment_updater.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,8 @@ const NUM_MERGE_THREADS: usize = 4;
/// Save the index meta file.
/// This operation is atomic:
/// Either
/// - it fails, in which case an error is returned,
/// and the `meta.json` remains untouched,
/// - it success, and `meta.json` is written
/// and flushed.
/// - it fails, in which case an error is returned, and the `meta.json` remains untouched,
/// - it success, and `meta.json` is written and flushed.
///
/// This method is not part of tantivy's public API
pub(crate) fn save_metas(metas: &IndexMeta, directory: &dyn Directory) -> crate::Result<()> {
Expand Down
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@
//!
//! - **Searching**: [Searcher] searches the segments with anything that implements
//! [Query](query::Query) and merges the results. The list of [supported
//! queries](query::Query#implementors). Custom Queries are supported by implementing the
//! [Query](query::Query) trait.
//! queries](query::Query#implementors). Custom Queries are supported by implementing the
//! [Query](query::Query) trait.
//!
//! - **[Directory](directory)**: Abstraction over the storage where the index data is stored.
//!
Expand Down
2 changes: 1 addition & 1 deletion src/postings/block_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
/// # Assumption
///
/// - The block is sorted. Some elements may appear several times. This is the case at the
/// end of the last block for instance.
/// end of the last block for instance.
/// - The target is assumed smaller or equal to the last element of the block.
pub fn branchless_binary_search(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
let mut start = 0;
Expand Down
13 changes: 4 additions & 9 deletions src/query/boolean_query/boolean_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,10 @@ use crate::schema::{IndexRecordOption, Term};
/// The boolean query returns a set of documents
/// that matches the Boolean combination of constituent subqueries.
///
/// The documents matched by the boolean query are
/// those which
/// * match all of the sub queries associated with the
/// `Must` occurrence
/// * match none of the sub queries associated with the
/// `MustNot` occurrence.
/// * match at least one of the sub queries associated
/// with the `Must` or `Should` occurrence.
///
/// The documents matched by the boolean query are those which
/// - match all of the sub queries associated with the `Must` occurrence
/// - match none of the sub queries associated with the `MustNot` occurrence.
/// - match at least one of the sub queries associated with the `Must` or `Should` occurrence.
///
/// You can combine other query types and their `Occur`ances into one `BooleanQuery`
///
Expand Down
3 changes: 3 additions & 0 deletions src/query/phrase_prefix_query/phrase_prefix_scorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ use crate::query::phrase_query::{intersection_count, PhraseScorer};
use crate::query::Scorer;
use crate::{DocId, Score};

// MultiPrefix is the larger variant, and also the one we expect most often. PhraseScorer is > 1kB
// though, it would be interesting to slim it down if possible.
#[allow(clippy::large_enum_variant)]
enum PhraseKind<TPostings: Postings> {
SinglePrefix {
position_offset: u32,
Expand Down
4 changes: 2 additions & 2 deletions src/query/phrase_query/phrase_scorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,8 @@ fn intersection_exists_with_slop(
/// In contrast to the regular algorithm this solves some issues:
/// - Keep track of the slop so far. Slop is a budget that is spent on the distance between terms.
/// - When encountering a match between two positions, which position is the best match is unclear
/// and depends on intersections afterwards, therefore this algorithm keeps left and right as
/// matches, but only counts one.
/// and depends on intersections afterwards, therefore this algorithm keeps left and right as
/// matches, but only counts one.
///
/// This algorithm may return an incorrect count in some cases (e.g. left, right expansion and is
/// then matches both on the following term.)
Expand Down
8 changes: 4 additions & 4 deletions src/query/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,10 @@ impl<'a> EnableScoring<'a> {
///
/// So to sum it up :
/// - a `Query` is a recipe to define a set of documents as well the way to score them.
/// - a [`Weight`] is this recipe tied to a specific [`Searcher`]. It may for instance
/// hold statistics about the different term of the query. It is created by the query.
/// - a [`Scorer`] is a cursor over the set of matching documents, for a specific
/// [`SegmentReader`]. It is created by the [`Weight`].
/// - a [`Weight`] is this recipe tied to a specific [`Searcher`]. It may for instance hold
/// statistics about the different term of the query. It is created by the query.
/// - a [`Scorer`] is a cursor over the set of matching documents, for a specific [`SegmentReader`].
/// It is created by the [`Weight`].
///
/// When implementing a new type of `Query`, it is normal to implement a
/// dedicated `Query`, [`Weight`] and [`Scorer`].
Expand Down
6 changes: 3 additions & 3 deletions src/query/range_query/fast_field_range_doc_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ pub(crate) struct RangeDocSet<T> {
///
/// There are two patterns.
/// - We do a full scan. => We can load large chunks. We don't know in advance if seek call
/// will come, so we start with small chunks
/// will come, so we start with small chunks
/// - We load docs, interspersed with seek calls. When there are big jumps in the seek, we
/// should load small chunks. When the seeks are small, we can employ the same strategy as on a
/// full scan.
/// should load small chunks. When the seeks are small, we can employ the same strategy as on
/// a full scan.
fetch_horizon: u32,
/// Current batch of loaded docs.
loaded_docs: VecCursor,
Expand Down
2 changes: 1 addition & 1 deletion src/schema/facet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ impl Facet {

/// Extract path from the `Facet`.
pub fn to_path(&self) -> Vec<&str> {
self.encoded_str().split(|c| c == FACET_SEP_CHAR).collect()
self.encoded_str().split(FACET_SEP_CHAR).collect()
}

/// This function is the inverse of Facet::from(&str).
Expand Down
3 changes: 1 addition & 2 deletions src/schema/field_entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ use crate::schema::{
///
/// It consists of
/// - a field name
/// - a field type, itself wrapping up options describing
/// how the field should be indexed.
/// - a field type, itself wrapping up options describing how the field should be indexed.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct FieldEntry {
name: String,
Expand Down
9 changes: 4 additions & 5 deletions src/schema/term.rs
Original file line number Diff line number Diff line change
Expand Up @@ -639,12 +639,11 @@ mod tests {
/// <field> + <type byte> + <value len>
///
/// - <field> is a big endian encoded u32 field id
/// - <type_byte>'s most significant bit expresses whether the term is a json term or not
/// The remaining 7 bits are used to encode the type of the value.
/// If this is a JSON term, the type is the type of the leaf of the json.
///
/// - <type_byte>'s most significant bit expresses whether the term is a json term or not The
/// remaining 7 bits are used to encode the type of the value. If this is a JSON term, the
/// type is the type of the leaf of the json.
/// - <value> is, if this is not the json term, a binary representation specific to the type.
/// If it is a JSON Term, then it is prepended with the path that leads to this leaf value.
/// If it is a JSON Term, then it is prepended with the path that leads to this leaf value.
const FAST_VALUE_TERM_LEN: usize = 4 + 1 + 8;

#[test]
Expand Down
4 changes: 2 additions & 2 deletions src/schema/text_options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,8 @@ impl TokenizerName {
///
/// It defines
/// - The amount of information that should be stored about the presence of a term in a document.
/// Essentially, should we store the term frequency and/or the positions (See
/// [`IndexRecordOption`]).
/// Essentially, should we store the term frequency and/or the positions (See
/// [`IndexRecordOption`]).
/// - The name of the `Tokenizer` that should be used to process the field.
/// - Flag indicating, if fieldnorms should be stored (See [fieldnorm](crate::fieldnorm)). Defaults
/// to `true`.
Expand Down
4 changes: 2 additions & 2 deletions src/store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
//! Most users should not access the `StoreReader` directly
//! and should rely on either
//!
//! - at the segment level, the
//! [`SegmentReader`'s `doc` method](../struct.SegmentReader.html#method.doc)
//! - at the segment level, the [`SegmentReader`'s `doc`
//! method](../struct.SegmentReader.html#method.doc)
//! - at the index level, the [`Searcher::doc()`](crate::Searcher::doc) method

mod compressors;
Expand Down
3 changes: 1 addition & 2 deletions src/termdict/fst_termdict/merger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ use crate::termdict::{TermOrdinal, TermStreamer};
///
/// The item yielded is actually a pair with
/// - the term
/// - a slice with the ordinal of the segments containing
/// the term.
/// - a slice with the ordinal of the segments containing the term.
pub struct TermMerger<'a> {
dictionaries: Vec<&'a TermDictionary>,
union: Union<'a>,
Expand Down
3 changes: 1 addition & 2 deletions src/termdict/sstable_termdict/merger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ impl<'a> Ord for HeapItem<'a> {
///
/// The item yield is actually a pair with
/// - the term
/// - a slice with the ordinal of the segments containing
/// the terms.
/// - a slice with the ordinal of the segments containing the terms.
pub struct TermMerger<'a> {
heap: BinaryHeap<HeapItem<'a>>,
current_streamers: Vec<HeapItem<'a>>,
Expand Down
14 changes: 6 additions & 8 deletions src/tokenizer/tokenizer_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,12 @@ use crate::tokenizer::{
///
/// By default, it is populated with the following managers.
///
/// * `raw` : does not process nor tokenize the text.
/// * `default` : Chops the text on according to whitespace and
/// punctuation, removes tokens that are too long, and lowercases
/// tokens
/// * `en_stem` : Like `default`, but also applies stemming on the
/// resulting tokens. Stemming can improve the recall of your
/// search engine.
/// * `whitespace` : Splits the text on whitespaces.
/// - `raw` : does not process nor tokenize the text.
/// - `default` : Chops the text on according to whitespace and punctuation, removes tokens that are
/// too long, and lowercases tokens.
/// - `en_stem` : Like `default`, but also applies stemming on the resulting tokens. Stemming can
/// improve the recall of your search engine.
/// - `whitespace` : Splits the text on whitespaces.
#[derive(Clone)]
pub struct TokenizerManager {
tokenizers: Arc<RwLock<HashMap<String, TextAnalyzer>>>,
Expand Down
14 changes: 7 additions & 7 deletions stacker/src/memory_arena.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
//! # Limitations
//!
//! - Your object shall not implement `Drop`.
//! - `Addr` to the `Arena` are 32-bits. The maximum capacity of the arena
//! is 4GB. *(Tantivy's indexer uses one arena per indexing thread.)*
//! - The arena only works for objects much smaller than `1MB`.
//! Allocating more than `1MB` at a time will result in a panic,
//! and allocating a lot of large object (> 500KB) will result in a fragmentation.
//! - Your objects are store in an unaligned fashion. For this reason,
//! the API does not let you access them as references.
//! - `Addr` to the `Arena` are 32-bits. The maximum capacity of the arena is 4GB. *(Tantivy's
//! indexer uses one arena per indexing thread.)*
//! - The arena only works for objects much smaller than `1MB`. Allocating more than `1MB` at a
//! time will result in a panic, and allocating a lot of large object (> 500KB) will result in a
//! fragmentation.
//! - Your objects are store in an unaligned fashion. For this reason, the API does not let you
//! access them as references.
//!
//! Instead, you store and access your data via `.write(...)` and `.read(...)`, which under the hood
//! stores your object using `ptr::write_unaligned` and `ptr::read_unaligned`.
Expand Down

0 comments on commit 85395d9

Please sign in to comment.