From 57f07a954cab14320baf4e3d219fe2df2ee7c4b1 Mon Sep 17 00:00:00 2001 From: Specy Date: Thu, 12 Sep 2024 14:56:58 +0200 Subject: [PATCH] feat: add typescript bindings using wasm --- .github/workflows/ci.yml | 40 +++++- Cargo.toml | 21 ++- bindings/typescript/.gitignore | 24 ++++ bindings/typescript/README.md | 104 ++++++++++++++ bindings/typescript/build.js | 16 +++ bindings/typescript/package-lock.json | 45 ++++++ bindings/typescript/package.json | 35 +++++ bindings/typescript/src/index.ts | 197 ++++++++++++++++++++++++++ bindings/typescript/src/types.ts | 164 +++++++++++++++++++++ bindings/typescript/src/utils.ts | 126 ++++++++++++++++ bindings/typescript/tsconfig.json | 15 ++ src/automaton.rs | 7 + src/errors.rs | 9 ++ src/grammar.rs | 61 ++++++++ src/lib.rs | 23 ++- src/parser.rs | 84 ++++++++++- src/tables.rs | 12 ++ src/trace.rs | 4 + src/tree.rs | 4 +- src/utils.rs | 40 ++++++ 20 files changed, 1025 insertions(+), 6 deletions(-) create mode 100644 bindings/typescript/.gitignore create mode 100644 bindings/typescript/README.md create mode 100644 bindings/typescript/build.js create mode 100644 bindings/typescript/package-lock.json create mode 100644 bindings/typescript/package.json create mode 100644 bindings/typescript/src/index.ts create mode 100644 bindings/typescript/src/types.ts create mode 100644 bindings/typescript/src/utils.ts create mode 100644 bindings/typescript/tsconfig.json create mode 100644 src/utils.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7e5e63c..5eba938 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -88,11 +88,11 @@ jobs: with: command: test - publish: + publish-crates-io: if: startsWith(github.ref, 'refs/tags/v') needs: [ conformance, test ] - name: Publish + name: Publish to Crates.io runs-on: "ubuntu-latest" steps: - name: Checkout @@ -109,3 +109,39 @@ jobs: run: cargo publish --token ${REGISTRY_TOKEN} env: REGISTRY_TOKEN: ${{ secrets.REGISTRY_TOKEN }} + + publish-npm: + if: startsWith(github.ref, 'refs/tags/v') + needs: [ conformance, test ] + + name: Publish to NPM + runs-on: "ubuntu-latest" + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Setup Rust + uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: nightly + override: true + + - name: Setup Node.js + uses: actions/setup-node@v3 + with: + node-version: '20' + + - name: Cd in to the node bindings + run: cd bindings/typescript + + - name: Install Dependencies + run: npm install + + - name: Build + run: npm run build + + - name: Publish + run: npm publish + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} diff --git a/Cargo.toml b/Cargo.toml index 493d81f..f7ea3b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ authors = [ [dependencies] clap = { version = "4.5", optional = true, features = ["derive"] } -colored = { version = "2.1" } +console_error_panic_hook = { version = "0.1.1", optional = true } dirs = { version = "5.0", optional = true } indexmap = { version = "2.3" } itertools = { version = "0.13" } @@ -21,9 +21,15 @@ prettytable-rs = { version = "0.10" } ptree = { version = "0.5" } regex = { version = "1.10" } rustyline = { version = "14.0", optional = true } +serde_renamed = { package = "serde", version = "1.0", features = ["derive"], optional = true } +serde-wasm-bindgen = { version = "0.6.5", optional = true } smallvec = { version = "1.13" } smol_str = { version = "0.3" } thiserror = { version = "1.0" } +wasm-bindgen = { version = "0.2.83", optional = true } + +[target.'cfg(not(target_family = "wasm"))'.dependencies] +colored = { version = "2.1" } [dev-dependencies] criterion = { version = "0.5", features = ["html_reports"] } @@ -31,6 +37,18 @@ criterion = { version = "0.5", features = ["html_reports"] } [features] default = ["repl"] repl = ["clap", "dirs", "rustyline"] +serde = [ + "indexmap/serde", + "serde_renamed", + "smol_str/serde", + "smallvec/serde", +] +wasm = [ + "console_error_panic_hook", + "serde", + "serde-wasm-bindgen", + "wasm-bindgen", +] [profile.release] lto = "fat" @@ -40,6 +58,7 @@ codegen-units = 1 result_large_err = "allow" [lib] +crate-type = ["cdylib", "rlib"] bench = false doctest = false diff --git a/bindings/typescript/.gitignore b/bindings/typescript/.gitignore new file mode 100644 index 0000000..a547bf3 --- /dev/null +++ b/bindings/typescript/.gitignore @@ -0,0 +1,24 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +lerna-debug.log* + +node_modules +dist +dist-ssr +*.local + +# Editor directories and files +.vscode/* +!.vscode/extensions.json +.idea +.DS_Store +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? diff --git a/bindings/typescript/README.md b/bindings/typescript/README.md new file mode 100644 index 0000000..e230a95 --- /dev/null +++ b/bindings/typescript/README.md @@ -0,0 +1,104 @@ +## Overview + +`dotlr` is a library for creating and inspecting LR family of parsers in TypeScript. It provides an interface to parse grammars, generate parsing tables, and trace parsing of inputs. The library leverages WebAssembly (WASM) to ensure efficient parsing. + +It is focused on providing educational resources for learning about parsing algorithms and compiler construction. The library is designed to be easy to use and understand, making it ideal for students, educators, and developers interested in language processing. + +### Table of Contents +1. [Installation](#installation) +2. [Basic Usage](#basic-usage) +3. [Defining a Grammar](#defining-a-grammar) +4. [Creating LR(1) Parser of the Grammar](#creating-lr1-parser-of-the-grammar) +5. [Creating LALR(1) Parser of the Grammar](#creating-lalr1-parser-of-the-grammar) + +## Installation + +Before using the `dotlr` library, you need to install it. The following instructions assume you have a project with `npm` already set up. + +```bash +npm install dotlr +``` + +### Importing the Library + +To use the `dotlr` library, import it into your TypeScript files: + +```ts +import { Grammar, LR1Parser, LALRParser } from 'dotlr'; +``` +this library uses `ts-results` under the hood to handle errors and results. +```ts +import { Ok, Err } from 'ts-results'; +``` +## Basic Usage + +The core of the `dotlr` library revolves around defining a grammar and using it to create a parser. The following steps will guide you through this process. + +## Defining a Grammar + +A grammar is a set of rules that define how input strings can be parsed. You can create a grammar using `Grammar.parse()` method. Here's an example: + +For more information on the syntax of the grammar, look [here](https://github.com/umut-sahin/dotlr?tab=readme-ov-file#usage) + +```ts +const grammarStr = ` + S -> A + A -> 'a' A + A -> 'b' +`; + +const grammarResult = Grammar.parse(grammarStr); + +if (grammarResult.ok) { + const grammar = grammarResult.val; + console.log("Grammar successfully parsed!"); + console.log(grammar.getSymbols()); + console.log(grammar.getProductions()); +} else { + console.error("Failed to parse grammar:", grammarResult.val); +} +``` + +- **Grammar.parse()**: Parses a string representation of a grammar and returns a `Grammar` object. +- **grammar.getSymbols()**: Returns all symbols (non-terminal and terminal) used in the grammar. +- **grammar.getProductions()**: Retrieves the list of productions (rules) defined in the grammar. + +## Creating LR(1) Parser of the Grammar + +The `LR1Parser` class allows you to create an LR(1) parser for the grammar and use it to parse input. + +```ts +const lr1ParserResult = LR1Parser.fromGrammar(grammar); + +if (lr1ParserResult.ok) { + const lr1Parser = lr1ParserResult.val; + + const input = "aab"; + const parseResult = lr1Parser.parse(input); + + if (parseResult.ok) { + const parseTree = parseResult.val; + console.log("Parse successful!"); + console.log(parseTree); + } else { + console.error("Parse error:", parseResult.val); + } +} else { + console.error("Failed to create LR(1) parser:", lr1ParserResult.val); +} +``` + +- **LR1Parser.fromGrammar()**: Consumes the `Grammar` object and returns an `LR1Parser`, you cannot reuse the *Grammar* object, if you need it, you can clone it by using `grammar.clone()`. +- **parser.parse()**: method attempts to parse the given input string according to the LR(1) grammar. Returns a parse tree if successful. +- **parser.trace()** method can be used to trace the parsing process. It returns a trace and the resulting parse tree at each step, if parsing is successful. +- **parser.tokenize()** method can be used to tokenize the input string. It returns a list of tokens. +- **parser.getActionTable()** method returns the action table of the parser, which is used to determine the next action based on the current state and input token. +- **parser.getGotoTable()** method returns the goto table of the parser, which is used to determine the next state based on the current state and non-terminal symbol. +- **parser.getParseTables()** method returns the parsing tables of the parser, which include the action and goto tables. +- **parser.getAutomaton()** method returns the automaton of the parser, which represents the states and transitions of the LR(1) parser. +- **parser.getFirstTable()** method returns the first table of the parser, which contains the first sets of symbols. +- **parser.getFollowTable()** method returns the follow table of the parser, which contains the follow sets of symbols. + +## Creating LALR(1) Parser of the Grammar + +The `LALR1Parser` is similar to the `LR1Parser`, but it uses Look-Ahead LR parsing, the API is the same. diff --git a/bindings/typescript/build.js b/bindings/typescript/build.js new file mode 100644 index 0000000..6ec81f3 --- /dev/null +++ b/bindings/typescript/build.js @@ -0,0 +1,16 @@ +import {execSync} from 'child_process'; +import fs from "fs/promises" + + +async function init() { + console.log("Starting build...") + execSync('tsc', {stdio: 'inherit'}); + await fs.cp("./src/pkg", "./dist/pkg", {recursive: true}); + await fs.unlink("./dist/pkg/package.json"); + await fs.unlink("./dist/pkg/README.md"); + await fs.unlink("./dist/pkg/.gitignore"); + console.log("Build complete") + +} + +init() diff --git a/bindings/typescript/package-lock.json b/bindings/typescript/package-lock.json new file mode 100644 index 0000000..51a70c6 --- /dev/null +++ b/bindings/typescript/package-lock.json @@ -0,0 +1,45 @@ +{ + "name": "dotlr", + "version": "0.8.5", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "dotlr", + "version": "0.8.5", + "dependencies": { + "fuse.js": "^7.0.0", + "ts-results": "^3.3.0" + }, + "devDependencies": { + "typescript": "^5.5.0" + } + }, + "node_modules/fuse.js": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/fuse.js/-/fuse.js-7.0.0.tgz", + "integrity": "sha512-14F4hBIxqKvD4Zz/XjDc3y94mNZN6pRv3U13Udo0lNLCWRBUsrMv2xwcF/y/Z5sV6+FQW+/ow68cHpm4sunt8Q==", + "engines": { + "node": ">=10" + } + }, + "node_modules/ts-results": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/ts-results/-/ts-results-3.3.0.tgz", + "integrity": "sha512-FWqxGX2NHp5oCyaMd96o2y2uMQmSu8Dey6kvyuFdRJ2AzfmWo3kWa4UsPlCGlfQ/qu03m09ZZtppMoY8EMHuiA==" + }, + "node_modules/typescript": { + "version": "5.6.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.2.tgz", + "integrity": "sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==", + "dev": true, + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + } + } +} diff --git a/bindings/typescript/package.json b/bindings/typescript/package.json new file mode 100644 index 0000000..4abfa38 --- /dev/null +++ b/bindings/typescript/package.json @@ -0,0 +1,35 @@ +{ + "type": "module", + "name": "dotlr", + "description": "An LR(1) parser generator and visualizer created for educational purposes.", + "keywords": [ + "educational", + "lalr-parsing", + "lr-parsing", + "parser-generator" + ], + "license": "MIT OR Apache-2.0", + "version": "0.1.18", + "main": "dist/index.js", + "exports": { + ".": "./dist/index.js", + "./types": "./dist/types.js", + "./utils": "./dist/utils.js" + }, + "module": "dist/index.js", + "typings": "dist/index.d.ts", + "repository": "https://github.com/umut-sahin/dotlr", + "scripts": { + "build": "npm i -g wasm-pack && npm run build-all", + "build-all": "npm run build-wasm:no-test && npm run build-lib", + "build-wasm": "cd .. && cargo test && wasm-pack build --out-dir bindings/typescript/src/pkg --out-name dotlr --features wasm --no-default-features", + "build-wasm:no-test": "cd .. && wasm-pack build --out-dir bindings/typescript/src/pkg --out-name dotlr --features wasm --no-default-features", + "build-lib": "node build.js" + }, + "devDependencies": { + "typescript": "^5.5.0" + }, + "dependencies": { + "ts-results": "^3.3.0" + } +} diff --git a/bindings/typescript/src/index.ts b/bindings/typescript/src/index.ts new file mode 100644 index 0000000..9ebc5e7 --- /dev/null +++ b/bindings/typescript/src/index.ts @@ -0,0 +1,197 @@ +import {Grammar as _Grammar, Parser as _Parser,} from './pkg/dotlr' +import { + ActionTable, + Automaton, + FirstTable, + FollowTable, + GoToTable, + GrammarError, + ParserError, + ParsingError, + ParsingTables, + Rule, + Token, + Trace, + Tree +} from './types' +import {Err, Ok} from "ts-results"; + +export class Grammar< + T extends string = string, + NT extends string = string, + R extends string = string +> { + grammar: _Grammar + private cache = { + symbols: null as NT[] | null, + constant_tokens: null as T[] | null, + start_symbol: null as NT | null, + regex_tokens: null as Map | null, + productions: null as Rule>[] | null, + stringify: null as string | null + } + + private constructor(grammar: _Grammar) { + this.grammar = grammar + } + + static parse(grammar: string) { + try { + const res = _Grammar.parse_wasm(grammar) + return Ok(new Grammar(res)) + } catch (e) { + return Err(e as GrammarError) + } + } + + getSymbols() { + return this.cache.symbols ??= this.grammar.symbols_wasm() as NT[] + } + + getConstantTokens() { + return this.cache.constant_tokens ??= this.grammar.constant_tokens_wasm() as T[] + } + + getStartSymbol() { + return this.cache.start_symbol ??= this.grammar.start_symbol_wasm() as NT + } + + getProductions() { + return this.cache.productions ??= this.grammar.rules_wasm() as Rule>[] + } + + getRegexTokens() { + return this.cache.regex_tokens ??= this.grammar.regular_expressions_wasm() as Map + } + + stringify() { + return this.cache.stringify ??= this.grammar.to_string_wasm() as string + } + + clone() { + return new Grammar(this.grammar.clone_wasm()) + } +} + + +class Parser< + T extends string = string, + NT extends string = string, + R extends string = string +> { + + private parser: _Parser + private cache = { + action_table: null as ActionTable> | null, + goto_table: null as GoToTable | null, + parsing_tables: null as ParsingTables> | null, + automaton: null as Automaton> | null, + first_table: null as FirstTable> | null, + follow_table: null as FollowTable> | null, + } + + constructor(parser: _Parser) { + this.parser = parser + } + + parse(input: string) { + try { + return Ok(this.parser.parse_wasm(input) as Tree>) + } catch (e) { + return Err(e as ParsingError) + } + } + + getActionTable() { + return this.cache.action_table ??= this.parser.action_table_wasm() as ActionTable> + } + + getGotoTable() { + return this.cache.goto_table ??= this.parser.goto_table_wasm() as GoToTable + } + + getParseTables() { + return this.cache.parsing_tables ??= this.parser.parsing_tables_wasm() as ParsingTables> + } + + getAutomaton() { + return this.cache.automaton ??= this.parser.automaton_wasm() as Automaton> + } + + getFirstTable() { + return this.cache.first_table ??= this.parser.first_table_wasm() as FirstTable> + } + + getFollowTable() { + return this.cache.follow_table ??= this.parser.follow_table_wasm() as FollowTable> + } + + tokenize(input: string) { + try { + const tokens = this.parser.tokenize_wasm(input) as [Token, string][] + return Ok(tokens.map(([token, slice]) => ({ + token, slice + }))) + } catch (e) { + return Err(e as ParsingError) + } + } + + trace(input: string) { + try { + const [trace, tree] = this.parser.trace_wasm(input) as [Trace>>, Tree>] + return Ok({ + trace, + tree + }) + } catch (e) { + return Err(e as ParsingError) + } + } +} + +class LRParser { + //TODO +} + +export class LR1Parser< + T extends string = string, + NT extends string = string, + R extends string = string +> extends Parser { + private constructor(parser: _Parser) { + super(parser) + } + + /** + * Consumes a grammar and returns a parser, the grammar is consumed and the ownership is transferred to the parser + */ + static fromGrammar(grammar: G) { + try { + return Ok(new LR1Parser(_Parser.new_wasm(grammar.grammar))) + } catch (e) { + return Err(e as ParserError) + } + } +} + +export class LALR1Parser< + T extends string = string, + NT extends string = string, + R extends string = string +> extends Parser { + private constructor(parser: _Parser) { + super(parser) + } + + /** + * Consumes a grammar and returns a parser, the grammar is consumed and the ownership is transferred to the parser + */ + static fromGrammar(grammar: G) { + try { + return Ok(new LALR1Parser(_Parser.new_wasm(grammar.grammar))) + } catch (e) { + return Err(e as ParserError) + } + } +} \ No newline at end of file diff --git a/bindings/typescript/src/types.ts b/bindings/typescript/src/types.ts new file mode 100644 index 0000000..888b99f --- /dev/null +++ b/bindings/typescript/src/types.ts @@ -0,0 +1,164 @@ +//TODO not sure how to type Symbol +export type Rule = { + symbol: string, + pattern: AtomicPattern[] +} + + +//TODO not sure how to type Symbol +export type AtomicPattern = { + type: 'Symbol', + value: string +} | { + type: 'Token', + value: T +} + + +export type Tree = { + type: 'Terminal' + value: { + token: T, + slice: string + } +} | { + type: 'NonTerminal' + value: { + symbol: NT, + pattern: Tree[] + } +} + +export type Token = { + type: 'Constant' + value: C +} | { + type: 'Regex', + value: R +} | { + type: 'Eof' +} + +export type GrammarError = { + type: "UnexpectedToken", + value: { + line: number, + column: number, + token: string + expected: string[] + } +} | { + type: "UnexpectedEof", + value: { + expected: string[] + } +} | { + type: "InvalidRegex", + value: { + line: number, + column: number, + regex: string + } +} + +export type ParserError = { + type: "EmptyGrammar" +} | { + type: "UndefinedSymbol", + value: { + symbol: string + rule: Rule + } +} | { + type: "UndefinedRegexToken", + value: { + regex_token: string + rule: Rule + } +} | { + type: "Conflict", + value: { + parser: { //TODO all the other types should have a Serialized* version, i'd move this to the lib + grammar: any + first_table: any + follow_table: any + automaton: any + parsing_tables: any + } + state: number, + token: T, + } +} + +export type ParsingError = { + type: "UnknownToken", + value: { + token: string + } +} | { + type: "UnexpectedToken" + value: { + token: string + expected: T[] + } +} | { + type: "UnexpectedEof" + value: { + expected: T[] + } +} + + +export type Trace = { + steps: Step[] +} +export type Step = { + state_stack: number[] + tree_stack: Tr[] + remaining_tokens: Tr extends Tree ? T[] : never + action_taken: Action +} +export type Item = { + rule: Rule, + dot: number, + lookahead: T[] +} +export type State = { + id: number + items: Item[] + transitions: Map, number> +} +export type Automaton = { + states: State[] +} + +export type Action = { + type: 'Shift', + value: { + next_state: number + } +} | { + type: 'Reduce', + value: { + rule_index: number + } +} | { + type: 'Accept', + value: { + rule_index: number + } +} + + +export type FirstTable = Map + +export type FollowTable = Map + +export type GoToTable = Map[] + +export type ActionTable = Map[] + +export type ParsingTables = { + action_table: ActionTable + goto_table: GoToTable +} \ No newline at end of file diff --git a/bindings/typescript/src/utils.ts b/bindings/typescript/src/utils.ts new file mode 100644 index 0000000..6bc180e --- /dev/null +++ b/bindings/typescript/src/utils.ts @@ -0,0 +1,126 @@ +import type {Action, AtomicPattern, GrammarError, Item, ParserError, ParsingError, Rule, Token, Tree} from "./types"; + +export function stringifyToken(token: Token, noApostrophes = false) { + if (token.type === 'Eof') return "$" + if (token.type === "Regex") return `%${token.value}` + if (token.type === "Constant") return noApostrophes ? token.value : `'${token.value}'` + return "" +} + +export function stringifyAtom(atom: AtomicPattern, noApostrophes = false) { + if (atom.type === 'Symbol') return atom.value + if (atom.type === 'Token') return stringifyToken(atom.value, noApostrophes) + return "" +} + +export function stringifyItem(item: Item, noApostrophes = false) { + const children = item.rule.pattern.map((a) => stringifyAtom(a, noApostrophes)) + //inserts the dot + children.splice(item.dot, 0, '.') + return `${item.rule.symbol} -> ${children.join(' ')}` +} + +export function stringifyRule(rule: Rule, noApostrophes = false) { + const children = rule.pattern.map(a => stringifyAtom(a, noApostrophes)) + return `${rule.symbol} -> ${children.join(' ')}` +} + +export function stringifyLookahead(item: Token[], noApostrophes = false) { + const children = item.map(t => stringifyToken(t, noApostrophes)) + return children.join(" ") +} + + +export function stringifyAction(action: Action) { + if (action.type === 'Accept') return `a${action.value.rule_index + 1}` + if (action.type === 'Reduce') return `r${action.value.rule_index + 1}` + if (action.type === 'Shift') return `s${action.value.next_state}` + return "" +} + + +export function stringifyActionVerbose(action: Action, rules: Rule[], noApostrophes: boolean = false) { + if (action.type === "Shift") { + return `Shift ${action.value.next_state}` + } else if (action.type === "Accept") { + return `Accept ${action.value.rule_index + 1} (${stringifyRule(rules[action.value.rule_index], noApostrophes)})` + } else if (action.type === "Reduce") { + return `Reduce ${action.value.rule_index + 1} (${stringifyRule(rules[action.value.rule_index], noApostrophes)})` + } + return "" +} + +export function stringifyTreeStack(tree: Tree[], noApostrophes = false): string[] { + return tree.map(i => { + if (i.type === "Terminal") return stringifyToken(i.value.token, noApostrophes) + if (i.type === "NonTerminal") return i.value.symbol + }) +} + + +export function stringifyTree(tree: Tree, indent: string = '', isLast: boolean = true): string { + const linePrefix = isLast ? '└─ ' : '├─ '; + let result = ''; + + if (tree.type === 'Terminal') { + const {token, slice} = tree.value; + if (token.type !== 'Eof') { + result += `${indent}${linePrefix}${token.value} [${slice}]\n`; + } + } else { + const {symbol, pattern} = tree.value; + result += `${indent}${linePrefix}${symbol}\n`; + + const newIndent = indent + (isLast ? ' ' : '│ '); + pattern.forEach((child, index) => { + result += stringifyTree(child, newIndent, index === pattern.length - 1); + }); + } + + return result; +} + + +export function stringifyGrammarError(e: GrammarError) { + if (e.type === "UnexpectedToken") { + return `Unexpected token, expected one of:\n${e.value.expected.map(maybeToken).join(', ')}` + } else if (e.type === "UnexpectedEof") { + return `Unexpected end of input, expected one of:\n${e.value.expected.map(maybeToken).join(', ')}` + } else if (e.type === 'InvalidRegex') { + return `Invalid regular expression\n${e.value.regex}` + } + return "Unknown error" +} + +function maybeToken(token: Token|string){ + return typeof token === 'string' ? token : stringifyToken(token) +} + +export function stringifyParsingError(error: ParsingError){ + if (error.type === "UnexpectedEof") { + return `Unexpected end of input, expected one of:\n${error.value.expected.map(maybeToken).join(", ")}` + } else if (error.type === 'UnknownToken') { + return `Unknown token: ${error.value.token}` + } else if (error.type === "UnexpectedToken") { + return `Unexpected token, expected one of:\n${error.value.expected.map(maybeToken).join(', ')}` + } + return "Unknown error" +} + +export function stringifyParserError(error: ParserError){ + if(error.type === "EmptyGrammar") return "Empty grammar" + if(error.type === "UndefinedSymbol") return `Undefined symbol: ${error.value.symbol}` + if(error.type === "UndefinedRegexToken") return `Undefined regex token: ${error.value.regex_token}` + if(error.type === "Conflict") return `Conflict in state ${error.value.state} on token ${stringifyToken(error.value.token)}` + return "Unknown error" +} + +export function stringifyError(error: GrammarError | ParsingError | ParserError){ + const s = stringifyGrammarError(error as GrammarError) + const s2 = stringifyParsingError(error as ParsingError) + const s3 = stringifyParserError(error as ParserError) + if([s, s2, s3].every(s => s === "Unknown error")) return "Unknown error" + if(s !== "Unknown error") return s + if(s2 !== "Unknown error") return s2 + return s3 +} diff --git a/bindings/typescript/tsconfig.json b/bindings/typescript/tsconfig.json new file mode 100644 index 0000000..3ee6c3a --- /dev/null +++ b/bindings/typescript/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "esnext", + "moduleResolution": "node", + "lib": [ + "esnext", + "dom" + ], + "declaration": true, + "esModuleInterop": true, + "outDir": "dist", + "rootDir": "./src", + } +} \ No newline at end of file diff --git a/src/automaton.rs b/src/automaton.rs index 1729bc2..3a68824 100644 --- a/src/automaton.rs +++ b/src/automaton.rs @@ -2,6 +2,8 @@ use crate::prelude::*; /// Item of a state of an LR(1) automaton. +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] #[derive(Clone, Debug, Eq, PartialEq)] pub struct Item { rule: Rule, @@ -60,6 +62,8 @@ impl Display for Item { /// State of an LR(1) automaton. #[derive(Clone, Debug, Default, Eq)] +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] pub struct State { id: usize, items: SmallVec<[Item; 2]>, @@ -181,6 +185,9 @@ impl PartialEq for State { /// LR(1) automaton of a grammar. +#[cfg_attr(feature = "wasm", wasm_bindgen)] +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] #[derive(Debug)] pub struct Automaton { states: Vec, diff --git a/src/errors.rs b/src/errors.rs index 134ac1f..c3a6312 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -2,6 +2,9 @@ use crate::prelude::*; /// Grammar error of a grammar string tried to be converted to a grammar. +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] +#[cfg_attr(feature = "serde", serde(tag = "type", content = "value"))] #[derive(Debug, Error)] pub enum GrammarError { /// An unexpected token has been encountered. @@ -47,6 +50,9 @@ pub enum GrammarError { /// Parser error of a parser tried to be constructed from a grammar. +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] +#[cfg_attr(feature = "serde", serde(tag = "type", content = "value"))] #[derive(Debug, Error)] pub enum ParserError { /// An empty grammar is tried to be parsed. @@ -80,6 +86,9 @@ pub enum ParserError { /// Parsing error of an input tried to be parsed with a parser. +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] +#[cfg_attr(feature = "serde", serde(tag = "type", content = "value"))] #[derive(Debug, Error)] pub enum ParsingError { /// An unknown token has been encountered. diff --git a/src/grammar.rs b/src/grammar.rs index 5ffe640..2056a49 100644 --- a/src/grammar.rs +++ b/src/grammar.rs @@ -2,6 +2,8 @@ use crate::prelude::*; /// Symbol (e.g., `S`, `E`) in a grammar. +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct Symbol(SmolStr); @@ -27,6 +29,8 @@ impl> From for Symbol { /// Constant token (e.g., `'+'`, `'-'`) in a grammar. +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct ConstantToken(SmolStr); @@ -52,6 +56,8 @@ impl> From for ConstantToken { /// Regular expression token (e.g., `%f`, `%s`) in a grammar. +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct RegexToken(SmolStr); @@ -77,6 +83,9 @@ impl> From for RegexToken { /// Token (e.g., `'+'`, `%f`, `$`) in a grammar. +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] +#[cfg_attr(feature = "serde", serde(tag = "type", content = "value"))] #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub enum Token { /// Constant token. @@ -117,6 +126,9 @@ impl From for Token { /// Elements (e.g., `E`, `'+'`, `%f`) of the pattern of a rule. +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] +#[cfg_attr(feature = "serde", serde(tag = "type", content = "value"))] #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub enum AtomicPattern { /// Symbol to match. @@ -160,6 +172,8 @@ impl From for AtomicPattern { /// Rule (e.g., `S -> E` `E -> F '+' E`) of a grammar. +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] #[derive(Clone, Debug, Eq, PartialEq)] pub struct Rule { symbol: Symbol, @@ -200,15 +214,20 @@ impl Display for Rule { /// Grammar of a language. +#[cfg_attr(feature = "wasm", wasm_bindgen)] +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] #[derive(Clone, Debug)] pub struct Grammar { symbols: IndexSet, start_symbol: Symbol, constant_tokens: IndexSet, + #[cfg_attr(feature = "serde", serde(serialize_with = "serialize_regex_map"))] regular_expressions: IndexMap, rules: Vec, } + impl Grammar { /// Creates a grammar from a grammar string. pub fn parse(grammar_string: &str) -> Result { @@ -216,6 +235,18 @@ impl Grammar { } } +#[cfg(feature = "wasm")] +#[cfg_attr(feature = "wasm", wasm_bindgen)] +impl Grammar { + pub fn parse_wasm(grammar_string: &str) -> Result { + match Grammar::parse(grammar_string) { + Ok(grammar) => Ok(grammar), + Err(error) => Err(serde_wasm_bindgen::to_value(&error)?), + } + } +} + + impl Grammar { /// Gets the symbols of the grammar. pub fn symbols(&self) -> &IndexSet { @@ -243,6 +274,36 @@ impl Grammar { } } +#[cfg(feature = "wasm")] +#[cfg_attr(feature = "wasm", wasm_bindgen)] +impl Grammar { + pub fn symbols_wasm(&self) -> Result { + Ok(serde_wasm_bindgen::to_value(&self.symbols)?) + } + pub fn start_symbol_wasm(&self) -> Result { + Ok(serde_wasm_bindgen::to_value(&self.start_symbol)?) + } + pub fn rules_wasm(&self) -> Result { + Ok(serde_wasm_bindgen::to_value(&self.rules)?) + } + pub fn to_string_wasm(&self) -> String { + self.to_string() + } + pub fn constant_tokens_wasm(&self) -> Result { + Ok(serde_wasm_bindgen::to_value(&self.constant_tokens)?) + } + + pub fn regular_expressions_wasm(&self) -> Result { + let index_map: IndexMap = + self.regular_expressions.iter().map(|(k, v)| (k.clone(), v.to_string())).collect(); + Ok(serde_wasm_bindgen::to_value(&index_map)?) + } + pub fn clone_wasm(&self) -> Grammar { + self.clone() + } +} + + impl Display for Grammar { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for rule in self.rules.iter() { diff --git a/src/lib.rs b/src/lib.rs index 8938b0b..f9cfcab 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,7 @@ mod parser; mod tables; mod trace; mod tree; +mod utils; pub use { automaton::{ @@ -43,9 +44,24 @@ pub use { }; mod prelude { + #[cfg(feature = "serde")] + pub use { + serde_renamed::Serialize, + utils::serialize_regex_map, + }; + + + #[cfg(feature = "wasm")] + pub use wasm_bindgen::prelude::*; + + #[cfg(not(target_family = "wasm"))] + pub use colored::*; + #[cfg(target_family = "wasm")] + pub use utils::MockColored; + + pub use { super::*, - colored::Colorize, indexmap::{ IndexMap, IndexSet, @@ -88,4 +104,9 @@ mod prelude { }, thiserror::Error, }; + #[cfg(feature = "serde")] + pub use { + serde_renamed::ser::SerializeMap, + serde_renamed::Serializer, + }; } diff --git a/src/parser.rs b/src/parser.rs index 7f91410..09c003c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,7 +1,9 @@ use crate::prelude::*; - /// LR(1) parser of a grammar. +#[cfg_attr(feature = "wasm", wasm_bindgen)] +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] #[derive(Debug)] pub struct Parser { grammar: Grammar, @@ -11,6 +13,7 @@ pub struct Parser { parsing_tables: ParsingTables, } + impl Parser { /// Crates an LR(1) parser of a grammar. pub fn lr(grammar: Grammar) -> Result { @@ -38,6 +41,23 @@ impl Parser { parser.check_conflicts_internal() } } +#[cfg(feature = "wasm")] +#[cfg_attr(feature = "wasm", wasm_bindgen)] +impl Parser { + pub fn new_wasm(grammar: Grammar) -> Result { + match Parser::lr(grammar) { + Ok(parser) => Ok(parser), + Err(error) => Err(serde_wasm_bindgen::to_value(&error)?), + } + } + pub fn new_lalr_wasm(grammar: Grammar) -> Result { + match Parser::lalr(grammar) { + Ok(parser) => Ok(parser), + Err(error) => Err(serde_wasm_bindgen::to_value(&error)?), + } + } +} + impl Parser { /// Gets the grammar of the parser. @@ -76,6 +96,29 @@ impl Parser { } } +#[cfg(feature = "wasm")] +#[cfg_attr(feature = "wasm", wasm_bindgen)] +impl Parser { + pub fn first_table_wasm(&self) -> Result { + Ok(serde_wasm_bindgen::to_value(&self.first_table)?) + } + pub fn follow_table_wasm(&self) -> Result { + Ok(serde_wasm_bindgen::to_value(&self.follow_table)?) + } + pub fn automaton_wasm(&self) -> Result { + Ok(serde_wasm_bindgen::to_value(&self.automaton)?) + } + pub fn parsing_tables_wasm(&self) -> Result { + Ok(serde_wasm_bindgen::to_value(&self.parsing_tables)?) + } + pub fn action_table_wasm(&self) -> Result { + Ok(serde_wasm_bindgen::to_value(&self.parsing_tables.action_table())?) + } + pub fn goto_table_wasm(&self) -> Result { + Ok(serde_wasm_bindgen::to_value(&self.parsing_tables.goto_table())?) + } +} + impl Parser { /// Tokenizes an input into a stream of tokens and their corresponding input slices. pub fn tokenize<'i>(&self, input: &'i str) -> Result, ParsingError> { @@ -133,6 +176,44 @@ impl Parser { } } +#[cfg(feature = "wasm")] +#[cfg_attr(feature = "wasm", wasm_bindgen)] +impl Parser { + pub fn tokenize_wasm(&self, input: &str) -> Result { + match self.tokenize(input) { + Ok(tokens) => Ok(serde_wasm_bindgen::to_value(&tokens)?), + Err(error) => Err(serde_wasm_bindgen::to_value(&error)?), + } + } + pub fn parse_wasm(&self, input: &str) -> Result { + let tokens = self.tokenize(input); + let tokens = match tokens { + Ok(tokens) => tokens, + Err(error) => return Err(serde_wasm_bindgen::to_value(&error)?), + }; + match self.parse(tokens) { + Ok(tree) => Ok(serde_wasm_bindgen::to_value(&tree)?), + Err(error) => Err(serde_wasm_bindgen::to_value(&error)?), + } + } + pub fn trace_wasm(&self, input: &str) -> Result, JsValue> { + let tokens = self.tokenize(input); + let tokens = match tokens { + Ok(tokens) => tokens, + Err(error) => return Err(serde_wasm_bindgen::to_value(&error)?), + }; + match self.trace(tokens) { + Ok((trace, tree)) => { + let trace = serde_wasm_bindgen::to_value(&trace)?; + let tree = serde_wasm_bindgen::to_value(&tree)?; + Ok(vec![trace, tree]) + }, + Err(error) => Err(serde_wasm_bindgen::to_value(&error)?), + } + } +} + + impl Parser { /// Internal grammar checks. fn check_grammar_internal(grammar: &Grammar) -> Result<(), ParserError> { @@ -274,6 +355,7 @@ impl Parser { } } + impl Parser { /// Dumps the parser to stdout. pub fn dump(&self) { diff --git a/src/tables.rs b/src/tables.rs index 08dfb99..c26fd52 100644 --- a/src/tables.rs +++ b/src/tables.rs @@ -2,6 +2,9 @@ use crate::prelude::*; /// First table of the symbols in a grammar. +#[cfg_attr(feature = "wasm", wasm_bindgen)] +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] #[derive(Debug)] pub struct FirstTable(IndexMap>); @@ -72,6 +75,9 @@ impl Deref for FirstTable { /// Follow table of the symbols in a grammar. +#[cfg_attr(feature = "wasm", wasm_bindgen)] +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] #[derive(Debug)] pub struct FollowTable(IndexMap>); @@ -173,6 +179,9 @@ impl Deref for FollowTable { /// Action (e.g., `Shift 3`, `Reduce 2`, `Accept 1`) to perform during a parsing step. +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] +#[cfg_attr(feature = "serde", serde(tag = "type", content = "value"))] #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub enum Action { /// Shift the first remaining input token into symbol stack and transition to a new state. @@ -204,6 +213,9 @@ impl Display for Action { /// Action and goto tables of a parser. +#[cfg_attr(feature = "wasm", wasm_bindgen)] +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] #[derive(Debug)] pub struct ParsingTables { action_table: Vec>>, diff --git a/src/trace.rs b/src/trace.rs index d3f9829..d7da69a 100644 --- a/src/trace.rs +++ b/src/trace.rs @@ -3,6 +3,8 @@ use crate::prelude::*; /// Step of a parsing trace. #[derive(Debug)] +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] pub struct Step<'i> { pub(crate) state_stack: Vec, pub(crate) tree_stack: Vec>, @@ -35,6 +37,8 @@ impl<'i> Step<'i> { /// Trace of a parse. #[derive(Debug, Default)] +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] pub struct Trace<'i> { steps: Vec>, } diff --git a/src/tree.rs b/src/tree.rs index 707e2a5..c2bee2c 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -3,8 +3,10 @@ use { std::io::BufWriter, }; - /// Parse tree of a parsed input. +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "serde", serde(crate = "serde_renamed"))] +#[cfg_attr(feature = "serde", serde(tag = "type", content = "value"))] #[derive(Clone, Debug)] pub enum Tree<'i> { /// Terminal node. diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..2c50045 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,40 @@ +use crate::prelude::*; + + +/// The `colored` crate uses OS specific features to colorize the output, which are not available in +/// the WASM target. This trait provides a mock implementation of the `colored` crate for the WASM target. +#[cfg(target_family = "wasm")] +pub trait MockColored { + fn green(&self) -> String; + fn cyan(&self) -> String; + fn bold(&self) -> String; +} + +#[cfg(target_family = "wasm")] +impl> MockColored for T { + fn green(&self) -> String { + self.as_ref().to_owned() + } + fn cyan(&self) -> String { + self.as_ref().to_owned() + } + fn bold(&self) -> String { + self.as_ref().to_owned() + } +} + +/// Serialize a map of regex objects to a map of regex strings. +#[cfg(feature = "serde")] +pub fn serialize_regex_map( + map: &IndexMap, + serializer: S, +) -> Result +where + S: Serializer, +{ + let mut map_serializer = serializer.serialize_map(Some(map.len()))?; + for (key, value) in map { + map_serializer.serialize_entry(key, &value.to_string())?; + } + map_serializer.end() +}