Skip to content

Commit

Permalink
fix: Fix search clauses generation
Browse files Browse the repository at this point in the history
  • Loading branch information
3y3 committed Sep 30, 2024
1 parent 9449e06 commit d0abda2
Show file tree
Hide file tree
Showing 9 changed files with 220 additions and 37 deletions.
10 changes: 10 additions & 0 deletions jest.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/** @type {import('ts-jest/dist/types').InitialOptionsTsJest} */
module.exports = {
snapshotFormat: {
escapeString: true,
printBasicPrototype: true,
},
transform: {
'^.+\\.tsx?$': ['esbuild-jest', {tsconfig: './tsconfig.json'}],
},
};
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"build:code": "node esbuild/build.mjs && tsc --emitDeclarationOnly",
"build:clean": "rm -rf lib",
"prepublishOnly": "npm run build",
"test": "exit 0",
"test": "jest",
"typecheck": "tsc --noEmit",
"lint": "lint update && lint",
"lint:fix": "lint update && lint fix",
Expand All @@ -36,8 +36,10 @@
"@diplodoc/lint": "^1.1.3",
"@diplodoc/tsconfig": "^1.0.2",
"@esbuild-plugins/tsconfig-paths": "^0.1.2",
"@types/jest": "^29.5.13",
"@types/lunr": "^2.3.7",
"esbuild": "^0.23.1",
"jest": "^29.7.0",
"ts-dedent": "^2.2.0",
"typescript": "^5.6.2"
},
Expand Down
25 changes: 21 additions & 4 deletions src/indexer/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ type DocumentInfo = {
keywords: string[];
};

export enum ReleaseFormat {
JSONP = 'jsonp',
RAW = 'raw',
}

export class Indexer {
private indices: Record<string, Builder> = {};

Expand All @@ -34,7 +39,11 @@ export class Indexer {
*
* @returns {void}
*/
add(lang: string, url: string, data: DocPageData) {
add(
lang: string,
url: string,
data: Pick<DocPageData, 'title' | 'html' | 'meta' | 'leading' | 'toc'>,
) {
if (!this.indices[lang]) {
this.init(lang);
}
Expand All @@ -61,12 +70,20 @@ export class Indexer {
* Dumps index and registry for target language.
*
* @param lang - index language
* @param format - output format
*
* @returns {{index: Index, registry: Registry}}
*/
release(lang: string) {
const index = 'self.index=' + JSON.stringify(this.indices[lang].build());
const registry = 'self.registry=' + JSON.stringify(this.docs[lang]);
release(lang: string, format = ReleaseFormat.JSONP) {
const index = this.indices[lang].build();
const registry = this.docs[lang];

if (format === ReleaseFormat.JSONP) {
return {
index: 'self.index=' + JSON.stringify(index),
registry: 'self.registry=' + JSON.stringify(registry),
};
}

return {index, registry};
}
Expand Down
2 changes: 1 addition & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ enum Confidence {

export interface WorkerConfig extends ISearchWorkerConfig {
tolerance: number;
confidence: Confidence;
confidence: `${Confidence}` | Confidence;
resources: {
index: string;
registry: string;
Expand Down
4 changes: 2 additions & 2 deletions src/worker/format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ const SHORT_HEAD = 20;
type Trimmer = (text: string, score: Score) => [string, Position[]];

export function format(
{base, mark}: WorkerConfig,
{base, mark}: Pick<WorkerConfig, 'base' | 'mark'>,
results: SearchResult[],
registry: Registry,
trim: Trimmer,
Expand All @@ -18,7 +18,7 @@ export function format(
const doc = registry[entry.ref];
const item = {
type: 'page',
link: `${base}/${entry.ref}`,
link: `${base.replace(/\/?$/, '')}/${entry.ref.replace(/&\/?/, '')}`,
title: doc.title,
description: doc.content.slice(0, MAX_LENGTH),
} as SearchSuggestPageItem;
Expand Down
62 changes: 34 additions & 28 deletions src/worker/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@ import {INDEX_FIELDS} from '../constants';

import {phrased, sparsed} from './score';

const withIndex = (index: Index) => (builder: Index.QueryBuilder | false) =>
function withIndex() {
if (!builder) {
return false;
}
const isStrategy = (candidate: unknown): candidate is Index.QueryBuilder =>
typeof candidate === 'function';

const withIndex = (index: Index) => (builder: Index.QueryBuilder) =>
function withIndex() {
return index.query(builder);
};

Expand All @@ -32,49 +31,48 @@ const makeStrategies = (tolerance: number, index: Index, clauses: FixedClause[],
[
tolerance >= 0 &&
function precise(query: Query) {
query.clauses = clauses.slice();
query.clauses = copy(clauses);
},
tolerance >= 0 &&
function precise(query: Query) {
query.clauses = clauses.slice();

if (!sealed) {
for (let i = query.clauses.length - 1; i >= 0; i--) {
const clause = query.clauses[i] as FixedClause;
if (clause.presence !== Query.presence.PROHIBITED) {
wildcard(clause, Query.wildcard.TRAILING);
break;
}
!sealed &&
function preciseUnsealed(query: Query) {
query.clauses = copy(clauses);

for (let i = query.clauses.length - 1; i >= 0; i--) {
const clause = query.clauses[i] as FixedClause;
if (clause.presence !== Query.presence.PROHIBITED) {
query.clauses[i] = wildcard(clause, Query.wildcard.TRAILING);
break;
}
}
},
tolerance >= 1 &&
function trailingWildcard(query: Query) {
query.clauses = clauses.map((clause) => {
query.clauses = copy(clauses).map((clause) => {
if (clause.presence !== Query.presence.PROHIBITED) {
wildcard(clause, Query.wildcard.TRAILING);
return wildcard(clause, Query.wildcard.TRAILING);
}
return clause;
});
},
tolerance >= 2 &&
function bothWildcard(query: Query) {
query.clauses = clauses.map((clause) => {
query.clauses = copy(clauses).map((clause) => {
if (clause.presence !== Query.presence.PROHIBITED) {
// eslint-disable-next-line no-bitwise
wildcard(clause, Query.wildcard.LEADING | Query.wildcard.TRAILING);
return wildcard(clause, Query.wildcard.LEADING | Query.wildcard.TRAILING);
}
return clause;
});
},
]
.filter(Boolean)
.filter(isStrategy)
.map(withIndex(index));

export type SearchResult = Index.Result & {scores: Record<string, Score>};

export function search(
{tolerance, confidence}: WorkerConfig,
{tolerance, confidence}: Pick<WorkerConfig, 'confidence' | 'tolerance'>,
index: Index,
query: string,
count: number,
Expand Down Expand Up @@ -108,6 +106,8 @@ export function search(
}

function wildcard(clause: FixedClause, mode: Query.wildcard) {
const result = {...clause};

const requiredLength =
[
// eslint-disable-next-line no-bitwise
Expand All @@ -116,22 +116,24 @@ function wildcard(clause: FixedClause, mode: Query.wildcard) {
mode & Query.wildcard.LEADING ? 2 : 0,
].reduce((a, b) => a + b, 0) + 1;

if (clause.term.length < requiredLength) {
return;
if (result.term.length < requiredLength) {
return result;
}

// eslint-disable-next-line no-bitwise
if (mode & Query.wildcard.TRAILING) {
clause.term = clause.term + '*';
result.term = result.term + '*';
}

// eslint-disable-next-line no-bitwise
if (mode & Query.wildcard.LEADING) {
clause.term = '*' + clause.term;
result.term = '*' + result.term;
}

clause.wildcard = mode;
clause.usePipeline = false;
result.wildcard = mode;
result.usePipeline = false;

return result;
}

function byMaxScore(a: SearchResult, b: SearchResult) {
Expand All @@ -152,3 +154,7 @@ function getMaxScore(result: SearchResult) {

return score;
}

function copy(clauses: FixedClause[]) {
return clauses.slice().map((clause) => ({...clause}));
}
57 changes: 57 additions & 0 deletions test/__snapshots__/index.spec.ts.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`suggest should match code 1`] = `
Array [
"
<a href=\\"./7\\">
<div></div>
<div><span class=\\"mark\\">crm.stagehistory.list</span></div>
</a>
",
]
`;
exports[`suggest should match html content 1`] = `
Array [
"
<a href=\\"./1\\">
<div></div>
<div><span class=\\"mark\\">Lorem ipsum</span> dolor sit amet, consectetur adipiscing elit. Integer sit amet enim velit.</div>
</a>
",
]
`;
exports[`suggest should match title content 1`] = `
Array [
"
<a href=\\"./3\\">
<div><span class=\\"mark\\">Lorem ipsum</span> 1</div>
<div>Integer sit amet enim velit. Nam facilisis eget magna non blandit.</div>
</a>
",
"
<a href=\\"./4\\">
<div><span class=\\"mark\\">Lorem ipsum</span> 2</div>
<div>Nam facilisis eget magna non blandit. Sed semper, dui ut suscipit semper, nibh justo tempor purus, quis placerat enim dolor vitae neque.</div>
</a>
",
]
`;
exports[`suggest should score longest phrase 1`] = `
Array [
"
<a href=\\"./6\\">
<div></div>
<div>...urus, quis placerat <span class=\\"mark\\">enim dolor vitae</span> neque. Vivamus dignissim nunc et tortor vulputate maximus.</div>
</a>
",
"
<a href=\\"./5\\">
<div></div>
<div>Lorem ipsum <span class=\\"mark\\">dolor</span> sit amet, consectetur adipiscing elit. Integer sit amet enim velit. Nam facilisis eget magna non blandit.</div>
</a>
",
]
`;
91 changes: 91 additions & 0 deletions test/index.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import type {Index} from 'lunr';
import type {Registry, WorkerConfig} from '../src/types';
import type {SearchSuggestPageItem} from '@diplodoc/components';

import {Indexer, ReleaseFormat} from '../src/indexer';
import {search} from '../src/worker/search';
import {format, short} from '../src/worker/format';

const Lorem = [
'Lorem ipsum dolor sit amet, consectetur adipiscing elit.',
'Integer sit amet enim velit.',
'Nam facilisis eget magna non blandit.',
'Sed semper, dui ut suscipit semper, nibh justo tempor purus, quis placerat enim dolor vitae neque.',
'Vivamus dignissim nunc et tortor vulputate maximus.',
'Fusce lobortis pretium lectus, non pretium mi rhoncus quis.',
'Curabitur blandit imperdiet metus id luctus.',
'Lorem ipsum dolor sit amet, consectetur adipiscing elit.',
'Aenean lobortis ligula a mauris posuere, luctus pretium mauris ultrices.',
];

const Code = 'crm.stagehistory.list';

const item = ({link, title, description}: SearchSuggestPageItem) => `
<a href="${link}">
<div>${title}</div>
<div>${description}</div>
</a>
`;

describe('suggest', () => {
const lang = 'ru';
let indexer: Indexer;
let uid = 1;

function suggest(query: string, config: Pick<WorkerConfig, 'confidence' | 'tolerance'>) {
const {index, registry} = indexer.release(lang, ReleaseFormat.RAW);

const results = search(config, index as Index, query, 10, false);

return format({base: './', mark: 'mark'}, results, registry as Registry, short).map(item);
}

function add(html: string, title = '') {
indexer.add(lang, String(uid++), {
html,
title,
leading: false,
meta: {},
toc: {items: [], href: ''},
});
}

beforeEach(() => {
indexer = new Indexer();
});

it('should match html content', () => {
add(Lorem.slice(0, 2).join(' '));
add(Lorem.slice(1, 3).join(' '));

const config = {confidence: 'phrased', tolerance: 2} as const;

expect(suggest('Lorem ipsum', config)).toMatchSnapshot();
});

it('should match title content', () => {
add(Lorem.slice(1, 3).join(' '), 'Lorem ipsum 1');
add(Lorem.slice(2, 4).join(' '), 'Lorem ipsum 2');

const config = {confidence: 'phrased', tolerance: 2} as const;

expect(suggest('Lorem ipsum', config)).toMatchSnapshot();
});

it('should score longest phrase', () => {
add(Lorem.slice(0, 3).join(' '));
add(Lorem.slice(1, 5).join(' '));

const config = {confidence: 'phrased', tolerance: 2} as const;

expect(suggest('enim dolor vitae', config)).toMatchSnapshot();
});

it('should match code', () => {
add(Code);

const config = {confidence: 'phrased', tolerance: 2} as const;

expect(suggest('stagehistory', config)).toMatchSnapshot();
});
});
2 changes: 1 addition & 1 deletion tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@
"baseUrl": ".",
"outDir": "lib"
},
"include": ["src"]
"include": ["src", "test"]
}

0 comments on commit d0abda2

Please sign in to comment.