Skip to content

Commit

Permalink
Manually factor left recursion into right recursion in the grammar fi…
Browse files Browse the repository at this point in the history
…le for a more useful parse tree, 15x smaller grammar file, and faster parsing time
  • Loading branch information
adamziel committed Aug 18, 2024
1 parent c8652d5 commit 137d6ca
Show file tree
Hide file tree
Showing 17 changed files with 40,700 additions and 322,591 deletions.
4 changes: 2 additions & 2 deletions custom-parser/grammar-factoring/1-ebnf-to-json.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ import fs from 'fs';

const filePath = process.argv[2] || 'MySQLFull.ebnf';
let grammar = fs.readFileSync(filePath, 'utf8');
grammar = grammar.replaceAll('%', 'fragment__F')
let RULES = Grammars.W3C.getRules(grammar);

console.log(JSON.stringify(RULES, null, 2));
console.log(JSON.stringify(RULES, null, 2).replaceAll('fragment__F', '%'));
11 changes: 8 additions & 3 deletions custom-parser/grammar-factoring/2-cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import sys
import argparse
from ebnfutils import eliminate_left_recursion, encode_as_ebnf, factor_common_prefixes
from ebnfutils import eliminate_left_recursion, encode_as_ebnf, factor_common_prefixes, expand_grammar

class CustomArgumentParser(argparse.ArgumentParser):
def error(self, message):
Expand All @@ -14,7 +14,7 @@ def error(self, message):
parser.add_argument(
'mode',
type=str,
choices=['lr', 'cp', 'all'],
choices=['lr', 'expand', 'cp', 'all'],
help=(
'Specify the mode. Options are:\n'
"* 'lr' for left recursion elimination\n"
Expand Down Expand Up @@ -48,9 +48,10 @@ def error(self, message):
# print(f"Selected mode: {args.mode}")
# print(f"Filename: {args.filename}")

if args.filename is None or args.mode not in ["lr", "cp", "all"]:
if args.filename is None or args.mode not in ["expand", "lr", "cp", "all"]:
print("Usage: python ebnf-to-right-recursive.py <mode> <filename> [--format json|ebnf]")
print("Mode can be one of:")
print("* 'expand' for expansion of * ? + symbols")
print("* 'lr' for left recursion elimination")
print("* 'cp' for factoring common prefixes")
print("* 'all' for both")
Expand All @@ -68,6 +69,10 @@ def error(self, message):
sys.exit(1)

updated_grammar = input_grammar
if args.mode == "expand" or args.mode == "all":
grammar, new_rules = expand_grammar(updated_grammar)
updated_grammar = grammar

if args.mode == "lr" or args.mode == "all":
updated_grammar = eliminate_left_recursion(updated_grammar)

Expand Down
15 changes: 15 additions & 0 deletions custom-parser/grammar-factoring/3-phpize-grammar.php
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,21 @@ function export_as_php_var($var) {
$compressed_grammar[$rule_index_by_name[$rule["name"]]] = $new_branches;
}

// Compress the fragment rules names – they take a lot of disk space and are
// inlined in the final parse tree anyway.
$last_fragment = 1;
foreach($rules_ids as $id => $name) {
if(
$name[0] === '%' ||
str_ends_with($name, '_zero_or_one') ||
str_ends_with($name, '_zero_or_more') ||
str_ends_with($name, '_one_or_more')
) {
$rules_ids[$id] = '%f' . $last_fragment;
++$last_fragment;
}
}

$full_grammar = [
"rules_offset" => $rules_offset,
"rules_names" => $rules_ids,

Check warning on line 81 in custom-parser/grammar-factoring/3-phpize-grammar.php

View workflow job for this annotation

GitHub Actions / Check code style

Array double arrow not aligned correctly; expected 2 space(s) between ""rules_names"" and double arrow, but found 1.

Check warning on line 81 in custom-parser/grammar-factoring/3-phpize-grammar.php

View workflow job for this annotation

GitHub Actions / Check code style

Array double arrow not aligned correctly; expected 2 space(s) between ""rules_names"" and double arrow, but found 1.
Expand Down
Loading

0 comments on commit 137d6ca

Please sign in to comment.