Skip to content

Commit

Permalink
fix: Improve phrase scoring
Browse files Browse the repository at this point in the history
  • Loading branch information
3y3 committed Sep 25, 2024
1 parent 1ea9d71 commit 1f5b52d
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 39 deletions.
61 changes: 27 additions & 34 deletions src/worker/score.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ type ScoreState = {
score: number;
prev: ResultToken | null | undefined;
curr: ResultToken;
phrase: string;
phrase: string[];
position: Position;
};

Expand Down Expand Up @@ -89,7 +89,7 @@ export function phrased(result: Index.Result, terms: string[]) {
prev: null,
curr: token,
position: token.position.slice() as Position,
phrase: token.text,
phrase: [token.text],
};

return match;
Expand All @@ -101,7 +101,7 @@ export function phrased(result: Index.Result, terms: string[]) {

state.score = 0;
state.position = state.curr.position.slice() as Position;
state.phrase = state.curr.text;
state.phrase = [state.curr.text];

if (!tokens.length) {
return end;
Expand All @@ -117,20 +117,12 @@ export function phrased(result: Index.Result, terms: string[]) {

state.prev = state.curr;
state.curr = tokens.shift() as ResultToken;
state.phrase += ' ' + state.curr.text;
state.phrase.push(state.curr.text);

return match;
}

function match() {
if (terms.includes(state.curr.text as string)) {
return scoreToken;
} else {
return scoreWildcard;
}
}

function scoreToken() {
const {prev, curr} = state;

state.score += 2;
Expand All @@ -139,12 +131,8 @@ export function phrased(result: Index.Result, terms: string[]) {
return nextToken;
}

// This is partially buggy, if phrase has more that one similar token
if (distance(prev.position, curr.position) <= MERGE_TOLERANCE) {
if (phrase.includes(state.phrase)) {
state.score += 10;
}

if (isPhrase(phrase, state.phrase, distance(prev.position, curr.position))) {
state.score += 10;
state.position[1] = curr.position[1];

return nextToken;
Expand All @@ -153,22 +141,6 @@ export function phrased(result: Index.Result, terms: string[]) {
return nextScore;
}

function scoreWildcard() {
const {prev, curr} = state;

state.score += 0.5;

if (prev && distance(prev.position, curr.position) <= MERGE_TOLERANCE) {
if (phrase.includes(state.phrase)) {
state.score += 0.5;
}

state.position[1] = state.curr.position[1];
}

return nextScore;
}

function end() {
results = dedupe(results);
return null;
Expand Down Expand Up @@ -234,6 +206,27 @@ function dedupe(tokens: ScoreResult[]) {
return result;
}

function isPhrase(phrase: string, tokens: string[], distance: number) {
if (distance > MERGE_TOLERANCE) {
return false;
}

tokens = tokens.slice();

let index = 0;
while (tokens.length && index > -1) {
const token = tokens.shift() as string;

index = phrase.indexOf(token, index);

if (index > -1) {
index += token.length;
}
}

return index > -1;
}

function isIntersection(a: Position, b: Position) {
return (a[1] >= b[0] && a[1] <= b[1]) || (a[1] >= b[0] && a[1] <= b[1]);
}
Expand Down
22 changes: 17 additions & 5 deletions src/worker/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ interface FixedClause extends Query.Clause {

const makeStrategies = (tolerance: number, index: Index, clauses: FixedClause[], sealed: boolean) =>
[
tolerance > -1 &&
tolerance >= 0 &&
function precise(query: Query) {
query.clauses = clauses.slice();

Expand All @@ -42,7 +42,7 @@ const makeStrategies = (tolerance: number, index: Index, clauses: FixedClause[],
}
}
},
tolerance > 0 &&
tolerance >= 1 &&
function trailingWildcard(query: Query) {
query.clauses = clauses.map((clause) => {
if (clause.presence !== Query.presence.PROHIBITED) {
Expand All @@ -51,7 +51,7 @@ const makeStrategies = (tolerance: number, index: Index, clauses: FixedClause[],
return clause;
});
},
tolerance > 1 &&
tolerance >= 2 &&
function bothWildcard(query: Query) {
query.clauses = clauses.map((clause) => {
if (clause.presence !== Query.presence.PROHIBITED) {
Expand Down Expand Up @@ -102,14 +102,26 @@ export function search(
}

function wildcard(clause: FixedClause, mode: Query.wildcard) {
const requiredLength =
[
// eslint-disable-next-line no-bitwise
mode & Query.wildcard.TRAILING ? 2 : 0,
// eslint-disable-next-line no-bitwise
mode & Query.wildcard.LEADING ? 2 : 0,
].reduce((a, b) => a + b, 0) + 1;

if (clause.term.length < requiredLength) {
return;
}

// eslint-disable-next-line no-bitwise
if (mode & Query.wildcard.TRAILING) {
clause.term = clause.term + '*';
clause.term = clause.term.slice(0, -1) + '*';
}

// eslint-disable-next-line no-bitwise
if (mode & Query.wildcard.LEADING) {
clause.term = '*' + clause.term;
clause.term = '*' + clause.term.slice(1);
}

clause.wildcard = mode;
Expand Down

0 comments on commit 1f5b52d

Please sign in to comment.