Skip to content

Commit

Permalink
Merge pull request #1979 from frappe/search-improve
Browse files Browse the repository at this point in the history
fix(HelpdeskSearch): Add report to evaluate search and improvements with the same
  • Loading branch information
RitvikSardana authored Sep 12, 2024
2 parents 69c1c01 + 3efd29f commit 4509a77
Show file tree
Hide file tree
Showing 12 changed files with 254 additions and 12 deletions.
27 changes: 22 additions & 5 deletions helpdesk/api/article.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,41 @@
import frappe
from textblob import TextBlob
from textblob.exceptions import MissingCorpusError

from helpdesk.search import search as hd_search


def get_nouns(text: str):
blob = TextBlob(text)
def get_nouns(blob: TextBlob):
try:
return [word for word, pos in blob.pos_tags if pos[0] == "N"]
except LookupError:
return []


def get_noun_phrases(blob: TextBlob):
try:
return blob.noun_phrases
except (LookupError, MissingCorpusError):
return []


@frappe.whitelist()
def search(query: str):
out = hd_search(query, only_articles=True)
if not out: # fallback
if nouns := get_nouns(query):
query = " ".join(nouns)
out = hd_search(query, only_articles=True)
blob = TextBlob(query)
if noun_phrases := get_noun_phrases(blob):
and_query = " ".join(noun_phrases)
or_query = "|".join(noun_phrases)
out = hd_search(and_query, only_articles=True) or hd_search(
or_query, only_articles=True
)
if not out and (nouns := get_nouns(blob)):
and_query = " ".join(nouns)
or_query = "|".join(nouns)
out = hd_search(and_query, only_articles=True) or hd_search(
or_query, only_articles=True
)
if not out:
return []
return out[0].get("items", [])
6 changes: 6 additions & 0 deletions helpdesk/helpdesk/doctype/hd_settings/hd_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,9 @@ def on_update(self):
room = get_website_room()

frappe.publish_realtime(event, room=room, after_commit=True)

@property
def hd_search(self):
from helpdesk.api.article import search

Check warning on line 60 in helpdesk/helpdesk/doctype/hd_settings/hd_settings.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/helpdesk/doctype/hd_settings/hd_settings.py#L60

Added line #L60 was not covered by tests

return search

Check warning on line 62 in helpdesk/helpdesk/doctype/hd_settings/hd_settings.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/helpdesk/doctype/hd_settings/hd_settings.py#L62

Added line #L62 was not covered by tests
Empty file.
8 changes: 8 additions & 0 deletions helpdesk/helpdesk/doctype/hd_stopword/hd_stopword.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Copyright (c) 2024, Frappe Technologies and contributors
// For license information, please see license.txt

// frappe.ui.form.on("HD Stopword", {
// refresh(frm) {

// },
// });
56 changes: 56 additions & 0 deletions helpdesk/helpdesk/doctype/hd_stopword/hd_stopword.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{
"actions": [],
"allow_rename": 1,
"autoname": "field:word",
"creation": "2024-09-12 16:08:27.022111",
"doctype": "DocType",
"engine": "InnoDB",
"field_order": [
"word",
"column_break_jyol",
"enabled"
],
"fields": [
{
"fieldname": "word",
"fieldtype": "Data",
"label": "Word",
"unique": 1
},
{
"default": "1",
"fieldname": "enabled",
"fieldtype": "Check",
"label": "Enabled"
},
{
"fieldname": "column_break_jyol",
"fieldtype": "Column Break"
}
],
"index_web_pages_for_search": 1,
"links": [],
"modified": "2024-09-12 16:10:05.577545",
"modified_by": "Administrator",
"module": "Helpdesk",
"name": "HD Stopword",
"naming_rule": "By fieldname",
"owner": "Administrator",
"permissions": [
{
"create": 1,
"delete": 1,
"email": 1,
"export": 1,
"print": 1,
"read": 1,
"report": 1,
"role": "System Manager",
"share": 1,
"write": 1
}
],
"sort_field": "creation",
"sort_order": "DESC",
"states": []
}
9 changes: 9 additions & 0 deletions helpdesk/helpdesk/doctype/hd_stopword/hd_stopword.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright (c) 2024, Frappe Technologies and contributors
# For license information, please see license.txt

# import frappe
from frappe.model.document import Document

Check warning on line 5 in helpdesk/helpdesk/doctype/hd_stopword/hd_stopword.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/helpdesk/doctype/hd_stopword/hd_stopword.py#L5

Added line #L5 was not covered by tests


class HDStopword(Document):
pass

Check warning on line 9 in helpdesk/helpdesk/doctype/hd_stopword/hd_stopword.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/helpdesk/doctype/hd_stopword/hd_stopword.py#L8-L9

Added lines #L8 - L9 were not covered by tests
9 changes: 9 additions & 0 deletions helpdesk/helpdesk/doctype/hd_stopword/test_hd_stopword.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright (c) 2024, Frappe Technologies and Contributors
# See license.txt

# import frappe
from frappe.tests.utils import FrappeTestCase


class TestHDStopword(FrappeTestCase):
pass
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright (c) 2024, Frappe Technologies and contributors
// For license information, please see license.txt

frappe.query_reports["Ticket-Search Analysis"] = {
filters: [
// {
// "fieldname": "my_filter",
// "label": __("My Filter"),
// "fieldtype": "Data",
// "reqd": 1,
// },
],
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"add_total_row": 0,
"columns": [],
"creation": "2024-09-11 22:31:05.837918",
"disabled": 0,
"docstatus": 0,
"doctype": "Report",
"filters": [],
"idx": 0,
"is_standard": "Yes",
"letterhead": null,
"modified": "2024-09-11 22:31:05.837918",
"modified_by": "Administrator",
"module": "Helpdesk",
"name": "Ticket-Search Analysis",
"owner": "Administrator",
"prepared_report": 0,
"ref_doctype": "HD Ticket",
"report_name": "Ticket-Search Analysis",
"report_type": "Script Report",
"roles": [
{
"role": "System Manager"
},
{
"role": "Agent"
},
{
"role": "Support Team"
},
{
"role": "Supported Site User"
},
{
"role": "External Agent"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Copyright (c) 2024, Frappe Technologies and contributors
# For license information, please see license.txt

import frappe
from frappe import _

Check warning on line 5 in helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py#L4-L5

Added lines #L4 - L5 were not covered by tests

from helpdesk.api.article import search

Check warning on line 7 in helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py#L7

Added line #L7 was not covered by tests


def execute(filters: dict | None = None):

Check warning on line 10 in helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py#L10

Added line #L10 was not covered by tests
"""Return columns and data for the report.
This is the main entry point for the report. It accepts the filters as a
dictionary and should return columns and data. It is called by the framework
every time the report is refreshed or a filter is updated.
"""
columns = get_columns()
data = get_data()

Check warning on line 18 in helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py#L17-L18

Added lines #L17 - L18 were not covered by tests

return columns, data

Check warning on line 20 in helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py#L20

Added line #L20 was not covered by tests


def get_columns() -> list[dict]:

Check warning on line 23 in helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py#L23

Added line #L23 was not covered by tests
"""Return columns for the report.
One field definition per column, just like a DocType field definition.
"""
return [

Check warning on line 28 in helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py#L28

Added line #L28 was not covered by tests
{
"label": _("Subject"),
"fieldname": "subject",
"fieldtype": "Data",
},
{
"label": _("Top Result"),
"fieldname": "top_res",
"fieldtype": "Text",
},
{
"label": _("Search Score"),
"fieldname": "score",
"fieldtype": "Float",
},
]


def get_top_res(search_term: str) -> float:

Check warning on line 47 in helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py#L47

Added line #L47 was not covered by tests
"""Return the search score for the top result for the search term."""
res = search(search_term)
headings = ""
score = 0
for item in res:
headings += item["headings"] or item["subject"]
headings += "\n"
score += item["score"]
return headings, score

Check warning on line 56 in helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py#L49-L56

Added lines #L49 - L56 were not covered by tests


def get_data() -> list[list]:

Check warning on line 59 in helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py#L59

Added line #L59 was not covered by tests
"""Return data for the report.
The report data is a list of rows, with each row being a list of cell values.
"""
tickets = frappe.get_all(

Check warning on line 64 in helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py#L64

Added line #L64 was not covered by tests
"HD Ticket", {"agent_group": ["like", "%FC%"]}, ["name", "subject"], limit=100
)
for ticket in tickets:
ticket["top_res"], ticket["score"] = get_top_res(ticket["subject"])
return [

Check warning on line 69 in helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/helpdesk/report/ticket_search_analysis/ticket_search_analysis.py#L67-L69

Added lines #L67 - L69 were not covered by tests
[ticket["subject"], ticket["top_res"], ticket["score"]] for ticket in tickets
]
29 changes: 22 additions & 7 deletions helpdesk/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import frappe
from bs4 import BeautifulSoup, PageElement
from frappe.utils import cstr, strip_html_tags, update_progress_bar
from frappe.utils.caching import redis_cache
from frappe.utils.synchronization import filelock
from redis.commands.search.field import TagField, TextField
from redis.commands.search.indexDefinition import IndexDefinition
Expand Down Expand Up @@ -62,11 +63,20 @@
"you",
"me",
"do",
"has",
"been",
"urgent",
"want",
]


@redis_cache(3600 * 24)
def get_stopwords():
return STOPWORDS + frappe.get_all("HD Stopword", {"enabled": True}, pluck="name")

Check warning on line 75 in helpdesk/search.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/search.py#L75

Added line #L75 was not covered by tests


class Search:
unsafe_chars = re.compile(r"[\[\]{}<>+]")
unsafe_chars = re.compile(r"[\[\]{}<>+!-]")

def __init__(self, index_name, prefix, schema) -> None:
self.redis = frappe.cache()
Expand Down Expand Up @@ -95,7 +105,7 @@ def create_index(self):
self.redis.ft(self.index_name).create_index(
schema,
definition=index_def,
stopwords=STOPWORDS,
stopwords=get_stopwords(),
)
self._index_exists = True

Expand Down Expand Up @@ -128,6 +138,7 @@ def search(

query.summarize(fields=["description"])
query.scorer("DISMAX")
query.with_scores()

Check warning on line 141 in helpdesk/search.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/search.py#L141

Added line #L141 was not covered by tests

try:
result = self.redis.ft(self.index_name).search(query)
Expand All @@ -147,8 +158,7 @@ def search(
def clean_query(self, query):
query = query.strip().replace("-*", "*")
query = self.unsafe_chars.sub(" ", query)
query.strip()
return query
return query.strip().lower()

Check warning on line 161 in helpdesk/search.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/search.py#L161

Added line #L161 was not covered by tests

def spellcheck(self, query, **kwargs):
return self.redis.ft(self.index_name).spellcheck(query, **kwargs)
Expand Down Expand Up @@ -292,7 +302,10 @@ def get_count(self, doctype):

def get_records(self, doctype):
records = []
for d in frappe.db.get_all(doctype, fields=self.DOCTYPE_FIELDS[doctype]):
filters = {"published": True} if doctype == "HD Article" else {}
for d in frappe.db.get_all(

Check warning on line 306 in helpdesk/search.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/search.py#L305-L306

Added lines #L305 - L306 were not covered by tests
doctype, filters=filters, fields=self.DOCTYPE_FIELDS[doctype]
):
d.doctype = doctype
if doctype == "HD Article":
for heading, section in self.get_sections(d.content):
Expand All @@ -311,9 +324,9 @@ def get_records(self, doctype):
def search(query, only_articles=False):
search = HelpdeskSearch()
query = search.clean_query(query)
query_parts = query.split(" ")
query_parts = query.split()

Check warning on line 327 in helpdesk/search.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/search.py#L327

Added line #L327 was not covered by tests
query = " ".join(
[f"%{q}%" for q in query_parts if q not in STOPWORDS]
[f"{q}*" for q in query_parts if q not in get_stopwords()]
) # for stopwords to be ignored
result = search.search(query, start=0, highlight=True)
groups = {}
Expand Down Expand Up @@ -360,6 +373,8 @@ def download_corpus():
try:
data.find("taggers/averaged_perceptron_tagger_eng.zip")
data.find("tokenizers/punkt_tab.zip")
data.find("corpora/brown.zip")

Check warning on line 376 in helpdesk/search.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/search.py#L376

Added line #L376 was not covered by tests
except LookupError:
download("averaged_perceptron_tagger_eng")
download("punkt_tab")
download("brown")

Check warning on line 380 in helpdesk/search.py

View check run for this annotation

Codecov / codecov/patch

helpdesk/search.py#L380

Added line #L380 was not covered by tests

0 comments on commit 4509a77

Please sign in to comment.