Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Quick search improvements #2772

Open
wants to merge 36 commits into
base: Develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
5233f78
comments
Apr 25, 2023
4b2e7b8
Changed the quick search behavior so that it allows title and author …
Apr 26, 2023
d13d465
proof of concept fuzzy matching
Apr 26, 2023
ada0cc4
fuzzy matching for all but cc
Apr 29, 2023
8e8c9a1
fuzzy matching for all categories but cc
Apr 29, 2023
4be089d
Merge remote-tracking branch 'origin/master'
Apr 29, 2023
61f1e20
fuzzy matching for all categories but cc
Apr 29, 2023
7a43e07
Merge branch 'master' of https://github.com/quarz12/calibre-web
Apr 29, 2023
f497cc0
removed todo
Apr 29, 2023
2e3c93b
removed debugging help
Apr 29, 2023
6b93829
Merge remote-tracking branch 'origin/master'
Apr 29, 2023
af40fee
I accidentally used pycharms auto-add-to-requirements feature which r…
May 3, 2023
9fc0d54
idea for weighted sorting
May 8, 2023
4ba3b4e
typing for query
quarz12 May 9, 2023
932c796
fixed an issue where the lowering and stripping of the search term wa…
May 10, 2023
97c94f2
moved sorting back to original place
May 10, 2023
45d8d63
sort using only authorsort and title
quarz12 May 11, 2023
c115fe9
use partial token set ratio instead
quarz12 May 11, 2023
086527f
test at home
quarz12 May 11, 2023
ad5313e
new idea
May 15, 2023
e45619f
progress building string of book
quarz12 May 17, 2023
a936a33
updated Books string repr
May 27, 2023
cb5e66f
add partial token set ratio to db
May 28, 2023
025a888
rolled back string repr of book, moved that part to a new method
May 28, 2023
caf6079
moved author filter to the rest of the filters, ignore words smaller …
May 28, 2023
56d4a3d
Merge branch 'master' into sorting
quarz12 May 28, 2023
c018ef7
Merge pull request #2 from quarz12/sorting
quarz12 May 28, 2023
b96d02c
now return empty list if all words of query are < 3 letters, only com…
May 30, 2023
5e0430e
message when query returns 0 results
May 31, 2023
896e8fd
minor cleanup, removed unused code
Jun 3, 2023
4b36261
removed unwanted refactoring
quarz12 Apr 13, 2024
2ac1566
Merge pull request #3 from quarz12/sorting
quarz12 Apr 13, 2024
518c56e
removed some more unwanted refactoring
quarz12 Apr 13, 2024
a295651
Merge branch 'master' of https://github.com/quarz12/calibre-web
quarz12 Apr 13, 2024
cca178f
Merge branch 'master' of https://github.com/quarz12/calibre-web
quarz12 Apr 13, 2024
b37554f
Merge branch 'master' of https://github.com/quarz12/calibre-web
quarz12 Apr 13, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 47 additions & 11 deletions cps/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,10 @@
from .pagination import Pagination

from weakref import WeakSet
from thefuzz.fuzz import partial_token_sort_ratio, ratio

# %-level, 100 means exact match, 75 allows exactly 1 wrong character in a 4 letter word
FUZZY_SEARCH_ACCURACY = 75

log = logger.create()

Expand Down Expand Up @@ -381,6 +384,18 @@ def __repr__(self):
self.timestamp, self.pubdate, self.series_index,
self.last_modified, self.path, self.has_cover)

def __str__(self):
return "{0} {1} {2} {3} {4}".format(self.title, " ".join([tag.name for tag in self.tags]),
" ".join(
[series.name for series
in self.series]),
" ".join(
[author.name for author
in self.authors]),
" ".join([publisher.name for
publisher in
self.publishers]))

@property
def atom_timestamp(self):
return self.timestamp.strftime('%Y-%m-%dT%H:%M:%S+00:00') or ''
Expand Down Expand Up @@ -883,12 +898,18 @@ def check_exists_book(self, authr, title):
.filter(and_(Books.authors.any(and_(*q)), func.lower(Books.title).ilike("%" + title + "%"))).first()

def search_query(self, term, config, *join):
term.strip().lower()
term = term.strip().lower()
self.session.connection().connection.connection.create_function("lower", 1, lcase)
q = list()
author_terms = re.split("[, ]+", term)
for author_term in author_terms:
q.append(Books.authors.any(func.lower(Authors.name).ilike("%" + author_term + "%")))
self.session.connection().connection.connection.create_function("max_ratio", 2, max_ratio)
# splits search term into single words
words = re.split("[,\s]+", term)
# put the longest words first to make queries more efficient
words.sort(key=len, reverse=True)
words=list(filter(lambda w:len(w)>3,words))
# no word in search term is longer than 3 letters -> return empty query #TODO give some kind of error message
if len(words)==0:
return self.session.query(Books).filter(False)

query = self.generate_linked_query(config.config_read_column, Books)
if len(join) == 6:
query = query.outerjoin(join[0], join[1]).outerjoin(join[2]).outerjoin(join[3], join[4]).outerjoin(join[5])
Expand All @@ -899,19 +920,28 @@ def search_query(self, term, config, *join):
elif len(join) == 1:
query = query.outerjoin(join[0])

filter_expression = []
cc = self.get_cc_columns(config, filter_config_custom_read=True)
filter_expression = [Books.tags.any(func.lower(Tags.name).ilike("%" + term + "%")),
Books.series.any(func.lower(Series.name).ilike("%" + term + "%")),
Books.authors.any(and_(*q)),
Books.publishers.any(func.lower(Publishers.name).ilike("%" + term + "%")),
func.lower(Books.title).ilike("%" + term + "%")]
for c in cc:
if c.datatype not in ["datetime", "rating", "bool", "int", "float"]:
filter_expression.append(
getattr(Books,
'custom_column_' + str(c.id)).any(
func.lower(cc_classes[c.id].value).ilike("%" + term + "%")))
return query.filter(self.common_filters(True)).filter(or_(*filter_expression))
# filter out multiple languages and archived books,
results = query.filter(self.common_filters(True))
filters=[filter_expression] if filter_expression else []
# search tags, series and titles, also add author queries
for word in words:
filters.append(or_(*[
Books.tags.any(func.max_ratio(func.lower(Tags.name), word) >= FUZZY_SEARCH_ACCURACY),
Books.series.any(func.max_ratio(func.lower(Series.name), word) >= FUZZY_SEARCH_ACCURACY),
Books.authors.any(func.max_ratio(func.lower(Authors.name), word) >= FUZZY_SEARCH_ACCURACY),
Books.publishers.any(func.max_ratio(func.lower(Publishers.name), word) >= FUZZY_SEARCH_ACCURACY),
func.max_ratio(func.lower(Books.title), word) >= FUZZY_SEARCH_ACCURACY
]))
results = results.filter(and_(*filters))
return results

def get_cc_columns(self, config, filter_config_custom_read=False):
tmp_cc = self.session.query(CustomColumns).filter(CustomColumns.datatype.notin_(cc_exceptions)).all()
Expand All @@ -934,6 +964,7 @@ def get_search_results(self, term, config, offset=None, order=None, limit=None,
order = order[0] if order else [Books.sort]
pagination = None
result = self.search_query(term, config, *join).order_by(*order).all()
result = sorted(result,key=lambda query:partial_token_sort_ratio(str(query[0]),term),reverse=True)
result_count = len(result)
if offset != None and limit != None:
offset = int(offset)
Expand Down Expand Up @@ -1052,6 +1083,11 @@ def lcase(s):
return s.lower()


def max_ratio(string:str,term):
"""applies ratio on each word of string and returns the max value"""
words=string.split()
return max([ratio(word.strip(":"),term) if len(word.strip(":")) > 3 else 0 for word in words]) # ignore words of len < 3#do not compare words of len < 3 -> too generic

class Category:
name = None
id = None
Expand Down
1 change: 1 addition & 0 deletions cps/templates/search.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
{% if entries|length < 1 %}
<h2>{{_('No Results Found')}}</h2>
<p>{{_('Search Term:')}} {{adv_searchterm}}</p>
<p>{{_('Words smaller than 3 letters are not considered')}}</p>
{% else %}
<h2>{{result_count}} {{_('Results for:')}} {{adv_searchterm}}</h2>
{% if current_user.is_authenticated %}
Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@ flask-wtf>=0.14.2,<1.2.0
chardet>=3.0.0,<4.1.0
advocate>=1.0.0,<1.1.0
Flask-Limiter>=2.3.0,<3.4.0

thefuzz~=0.19.0
Levenshtein~=0.21.0