You can install @nlpjs/similarity:
npm install @nlpjs/similarity
It's used to calculate the levenshtein distance between two texts:
const { leven } = require('@nlpjs/similarity');
console.log(leven('potatoe', 'potatoe')); // expected: 0
console.log(leven('distance', 'eistancd')); // expected: 2
console.log(leven('mikailovitch', 'Mikhaïlovitch')); // expected: 3
It's used to calculate the levenshtein distance between two texts, but with an option to normalize both texts between calculation.
const { similarity } = require('@nlpjs/similarity');
function showDistances(word1, word2) {
console.log(`"${word1}" vs "${word2}" :`);
console.log(` similarity (non normalized): ${similarity(word1, word2)}`);
console.log(
` similarity (normalized): ${similarity(word1, word2, true)}`
);
}
showDistances('potatoe', 'potatoe');
showDistances('potatoe', 'Potatoe');
showDistances('distance', 'eistancd');
showDistances('mikailovitch', 'Mikhaïlovitch');
It can do spell check based on a dictionary of words with frequency. It search for the most similar word based on levenshtein distance. When several words has the same levenshtein distance, the word with more frequency is chosen.
const { SpellCheck } = require('../../packages/similarity/src');
// const { SpellCheck } = require('@nlpjs/similarity');
const spellCheck = new SpellCheck({
features: {
wording: 1,
worming: 4,
working: 3,
},
});
const actual = spellCheck.check(['worling'], 1);
console.log(actual);
const fs = require('fs');
const { SpellCheck } = require('@nlpjs/similarity');
const { NGrams } = require('@nlpjs/utils');
// File book.txt should contain the text that contains the words to be learnt.
// In the example we used Pride and Prejudice from Project Gutenberg
const lines = fs.readFileSync('./data/book.txt', 'utf-8').split(/\r?\n/);
const ngrams = new NGrams({ byWord: true });
const freqs = ngrams.getNGramsFreqs(lines, 1);
const spellCheck = new SpellCheck({ features: freqs });
const actual = spellCheck.check(['knowldge', 'thas', 'prejudize']);
console.log(actual);