Skip to content

Commit

Permalink
Remove lindera tantivy dep, clean.
Browse files Browse the repository at this point in the history
  • Loading branch information
fmassot committed Jul 11, 2023
1 parent 5ee45c4 commit b789e59
Show file tree
Hide file tree
Showing 7 changed files with 112 additions and 296 deletions.
191 changes: 2 additions & 189 deletions quickwit/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 0 additions & 10 deletions quickwit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ json_comments = "0.2"
libz-sys = "1.1.8"
lindera-core = "0.26.0"
lindera-dictionary = "0.26.0"
lindera-tantivy = { git = "https://github.com/quickwit-oss/lindera-tantivy", rev = "bfe8b8d", features = ["ipadic", "ipadic-compress", "cc-cedict", "cc-cedict-compress", "ko-dic", "ko-dic-compress"] }
lindera-tokenizer = "0.26.0"
lru = "0.10"
matches = "0.1.9"
Expand Down Expand Up @@ -235,12 +234,6 @@ tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "3c30066", d
"zstd-compression",
"quickwit",
] }
# tantivy-tokenizer-api is later patched for `tantivy` git repository. To avoid
# getting the error " patch for `tantivy-tokenizer-api` in `https://github.com/quickwit-oss/tantivy`
# points to the same source, but patches must point to different sources", we patch
# with URL "https://github.com/quickwit-oss/tantivy?rev=3c30066".
# See https://github.com/rust-lang/cargo/issues/5478.
tantivy-tokenizer-api = { git = "https://github.com/quickwit-oss/tantivy?rev=3c30066" }

# This is actually not used directly the goal is to fix the version
# used by reqwest.
Expand Down Expand Up @@ -420,6 +413,3 @@ sasl2-sys = { git = "https://github.com/quickwit-oss/rust-sasl/", rev = "daca921
#tracing-log = { git = "https://github.com/trinity-1686a/tracing.git", rev = "6806cac3" }
#tracing-opentelemetry = { git = "https://github.com/trinity-1686a/tracing.git", rev = "6806cac3" }
#tracing-subscriber = { git = "https://github.com/trinity-1686a/tracing.git", rev = "6806cac3" }

[patch.'https://github.com/quickwit-oss/tantivy/']
tantivy-tokenizer-api = { git = "https://github.com/quickwit-oss/tantivy?rev=3c30066" }
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,11 @@ impl TokenizerConfig {
pub fn text_analyzer(&self) -> anyhow::Result<TextAnalyzer> {
let mut text_analyzer_builder = match &self.tokenizer_type {
TokenizerType::Simple => TextAnalyzer::builder(SimpleTokenizer::default()).dynamic(),
// Note(fmassot): `multilang` is currently an "all-in-one" tokenizer with default
// filter. Static filters allows better performance which is a requirement
// for the `happy-plazza` project. We may want to revisit that later.
#[cfg(feature = "multilang")]
TokenizerType::Multilang => TextAnalyzer::builder(MultiLangTokenizer::new())
TokenizerType::Multilang => TextAnalyzer::builder(MultiLangTokenizer::default())
.filter(RemoveLongFilter::limit(DEFAULT_REMOVE_TOKEN_LENGTH))
.filter(LowerCaser)
.dynamic(),
Expand Down
Loading

0 comments on commit b789e59

Please sign in to comment.