Skip to content

Commit

Permalink
feat: Add language specific scoring
Browse files Browse the repository at this point in the history
  • Loading branch information
3y3 committed Sep 25, 2024
1 parent 1f5b52d commit 8efb238
Show file tree
Hide file tree
Showing 13 changed files with 256 additions and 20 deletions.
3 changes: 3 additions & 0 deletions .eslintignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,6 @@ node_modules

/src/__tests__
/jest.snapshots.js

/src/indexer/langs/*
/src/worker/langs/*
14 changes: 7 additions & 7 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@
node_modules
/plugin
/includer
/runtime
/index.d.ts
/types.d.ts
/types.js

.vscode
.idea
Expand All @@ -16,4 +10,10 @@ node_modules
/dist
/build
/cache
/coverage
/coverage

/src/indexer/langs/*
!/src/indexer/langs/index.d.ts

/src/worker/langs/*
!/src/worker/langs/index.d.ts
5 changes: 4 additions & 1 deletion .prettierignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,7 @@ node_modules
/dist
/build
/cache
/coverage
/coverage

/src/indexer/langs/*
/src/worker/langs/*
5 changes: 4 additions & 1 deletion .stylelintignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,7 @@ node_modules
/dist
/build
/cache
/coverage
/coverage

/src/indexer/langs/*
/src/worker/langs/*
13 changes: 13 additions & 0 deletions esbuild/build.mjs
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import esbuild from 'esbuild';
import {TsconfigPathsPlugin} from '@esbuild-plugins/tsconfig-paths';

import {indexer, worker} from './langs.mjs';

const common = {
tsconfig: './tsconfig.json',
bundle: true,
};

await indexer('src/indexer/langs');

esbuild.build({
...common,
target: 'node18',
Expand All @@ -26,3 +30,12 @@ esbuild.build({
outdir: 'lib/worker',
entryPoints: ['src/worker/index.ts'],
});

esbuild.build({
...common,
target: 'ES6',
format: 'cjs',
platform: 'browser',
outdir: 'lib/worker/langs',
entryPoints: await worker('src/worker/langs'),
});
165 changes: 165 additions & 0 deletions esbuild/langs.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import {resolve} from 'node:path';
import {writeFile} from 'node:fs/promises';
import {dedent} from 'ts-dedent';

const LANGS = [
'ar',
'da',
'de',
'du',
'el',
'es',
'fi',
'fr',
'he',
'hu',
'hy',
'it',
'ko',
'nl',
'no',
'pt',
'ro',
'ru',
'sv',
'tr',
'vi',

// 'zh',
'ja',
'jp',
'th',
'hi',
'ta',
'sa',
'kn',
'te',
];

export async function indexer(outdir) {
for (const lang of LANGS) {
const exports = dedent`
export function ${lang}(lunr: any) {
${attach(lang)}
return (lunr as unknown as {[lang: string]: Builder.Plugin}).${lang} as Builder.Plugin;
}
`;

const template = resolve(outdir, lang + '.ts');

await writeFile(
template,
dedent`
${imports(lang)}
${exports}
`,
'utf8',
);
}

const template = resolve(outdir, 'index.ts');

await writeFile(
template,
dedent`
import type {Builder} from 'lunr';
${LANGS.map((lang) => `import {${lang}} from './${lang}.js';`).join('\n')}
type Langs = Record<string, {(lunr: any): Builder.Plugin}>;
export const langs: Langs = {${LANGS.join(', ')}};
`,
'utf8',
);
}

export async function worker(outdir) {
const entries = [];

for (const lang of LANGS) {
const exports = dedent`
/// <reference no-default-lib="true"/>
/// <reference lib="ES2019" />
/// <reference lib="webworker" />
// Default type of \`self\` is \`WorkerGlobalScope & typeof globalThis\`
// https://github.com/microsoft/TypeScript/issues/14877
declare const self: ServiceWorkerGlobalScope & {
language?: (lunr: any) => Builder.Plugin;
};
self.language = function(lunr: any) {
${attach(lang)}
return (lunr as unknown as {[lang: string]: Builder.Plugin}).${lang} as Builder.Plugin;
};
`;

const template = resolve(outdir, lang + '.ts');

await writeFile(
template,
dedent`
${imports(lang)}
${exports}
`,
'utf8',
);

entries.push(template);
}

const template = resolve(outdir, 'index.ts');

await writeFile(
template,
dedent`
type Langs = string[];
export const langs: Langs = [${LANGS.map((lang) => `'${lang}'`).join(', ')}];
`,
'utf8',
);

entries.push(template);

return entries;
}

function imports(lang) {
return dedent`
import type {Builder} from 'lunr';
// @ts-ignore
import stemmer from 'lunr-languages/lunr.stemmer.support';
// @ts-ignore
import lang from 'lunr-languages/lunr.${lang}';
${
['ja', 'jp'].includes(lang)
? `
// @ts-ignore
import tinyseg from 'lunr-languages/tinyseg';
`
: ''
}
${
['th', 'hi', 'ta', 'sa', 'kn', 'te'].includes(lang)
? `
// @ts-ignore
import wordcut from 'lunr-languages/wordcut';
`
: ''
}
`;
}

function attach(lang) {
return dedent`
stemmer(lunr);
lang(lunr);
${['ja', 'jp'].includes(lang) ? `tinyseg(lunr);` : ''}
${['th', 'hi', 'ta', 'sa', 'kn', 'te'].includes(lang) ? `wordcut(lunr);` : ''}
`;
}
28 changes: 23 additions & 5 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 11 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"types": "./lib/indexer/index.d.ts",
"scripts": {
"build": "npm run build:clean && npm run build:code",
"build:code": "tsc --emitDeclarationOnly && node esbuild/build.mjs",
"build:code": "node esbuild/build.mjs && tsc --emitDeclarationOnly",
"build:clean": "rm -rf lib",
"prepublishOnly": "npm run build",
"test": "exit 0",
Expand All @@ -21,26 +21,33 @@
"types": "./lib/indexer/index.d.ts",
"default": "./lib/indexer/index.js"
},
"./worker": "./lib/worker/index.js"
"./worker": "./lib/worker/index.js",
"./worker/langs": {
"types": "./lib/worker/langs/index.d.ts",
"default": "./lib/worker/langs/index.js"
},
"./worker/langs/*": "./lib/worker/langs/*.js"
},
"author": "",
"license": "MIT",
"devDependencies": {
"@diplodoc/client": "^3.0.0-beta-1",
"@diplodoc/client": "^3.0.2",
"@diplodoc/components": "^4.13.0",
"@diplodoc/lint": "^1.1.3",
"@diplodoc/tsconfig": "^1.0.2",
"@esbuild-plugins/tsconfig-paths": "^0.1.2",
"@types/lunr": "^2.3.7",
"esbuild": "^0.23.1",
"ts-dedent": "^2.2.0",
"typescript": "^5.6.2"
},
"dependencies": {
"lunr": "^2.3.9",
"lunr-languages": "^1.14.0",
"node-html-parser": "^6.1.13"
},
"peerDependencies": {
"@diplodoc/client": "^3.0.0-beta-1",
"@diplodoc/client": "^3.0.2",
"@diplodoc/components": "^4.11.2"
}
}
7 changes: 6 additions & 1 deletion src/indexer/index.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import type {DocPageData} from '@diplodoc/components';

import {Builder} from 'lunr';
import lunr, {Builder} from 'lunr';

import {INDEX_FIELDS} from '../constants';

import {langs} from './langs';
import {html2text} from './html';

type DocumentInfo = {
Expand Down Expand Up @@ -73,6 +74,10 @@ export class Indexer {
private init(lang: string) {
const index = new Builder();

if (langs[lang]) {
index.use(langs[lang](lunr));
}

index.ref('url');

for (const [field, boost] of Object.entries(INDEX_FIELDS)) {
Expand Down
5 changes: 5 additions & 0 deletions src/indexer/langs/index.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import {Builder} from 'lunr';

type Langs = Record<string, Builder.Plugin>;

export const langs: Langs;
1 change: 1 addition & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export interface WorkerConfig extends ISearchWorkerConfig {
resources: {
index: string;
registry: string;
language?: string;
};
}

Expand Down
Loading

0 comments on commit 8efb238

Please sign in to comment.