-
Notifications
You must be signed in to change notification settings - Fork 38
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
22 changed files
with
403,896 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import { Grammars, Parser } from 'ebnf'; | ||
import fs from 'fs'; | ||
|
||
const filePath = process.argv[2] || 'MySQLFull.ebnf'; | ||
let grammar = fs.readFileSync(filePath, 'utf8'); | ||
let RULES = Grammars.W3C.getRules(grammar); | ||
|
||
console.log(JSON.stringify(RULES, null, 2)); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
import json | ||
import sys | ||
import argparse | ||
from ebnfutils import eliminate_left_recursion, encode_as_ebnf, factor_common_prefixes | ||
|
||
class CustomArgumentParser(argparse.ArgumentParser): | ||
def error(self, message): | ||
self.print_help(sys.stderr) | ||
self.exit(2, f"{self.prog}: error: {message}\n") | ||
|
||
parser = CustomArgumentParser(description="Processes the parser grammar.") | ||
|
||
# Add the mode positional argument | ||
parser.add_argument( | ||
'mode', | ||
type=str, | ||
choices=['lr', 'cp', 'all'], | ||
help=( | ||
'Specify the mode. Options are:\n' | ||
"* 'lr' for left recursion elimination\n" | ||
"* 'cp' for factoring common prefixes\n" | ||
"* 'all' for both\n" | ||
) | ||
) | ||
|
||
# Add the filename positional argument | ||
parser.add_argument( | ||
'filename', | ||
type=str, | ||
help='Specify the filename.' | ||
) | ||
|
||
# Add the format argument (optional flag) | ||
parser.add_argument( | ||
'--format', | ||
type=str, | ||
choices=['json', 'ebnf'], | ||
default='json', | ||
required=False, | ||
help='Specify the output format. Options are: json, ebnf.' | ||
) | ||
|
||
# Parse the arguments | ||
args = parser.parse_args() | ||
|
||
# Print the parsed values | ||
# print(f"Selected format: {args.format}") | ||
# print(f"Selected mode: {args.mode}") | ||
# print(f"Filename: {args.filename}") | ||
|
||
if args.filename is None or args.mode not in ["lr", "cp", "all"]: | ||
print("Usage: python ebnf-to-right-recursive.py <mode> <filename> [--format json|ebnf]") | ||
print("Mode can be one of:") | ||
print("* 'lr' for left recursion elimination") | ||
print("* 'cp' for factoring common prefixes") | ||
print("* 'all' for both") | ||
print("") | ||
print("Filename is the path to the JSON file containing the parsed EBNF grammar") | ||
print("") | ||
sys.exit(1) | ||
|
||
try: | ||
with open(args.filename) as fp: | ||
input_grammar = json.load(fp) | ||
except Exception as e: | ||
print(e, file=sys.stderr) | ||
print(f"Failed to load grammar from {args.filename}", file=sys.stderr) | ||
sys.exit(1) | ||
|
||
updated_grammar = input_grammar | ||
if args.mode == "lr" or args.mode == "all": | ||
updated_grammar = eliminate_left_recursion(updated_grammar) | ||
|
||
# if args.mode == "cp" or args.mode == "all": | ||
# updated_grammar = factor_common_prefixes(updated_grammar, passes=1) | ||
|
||
if args.format == "json": | ||
print(json.dumps(updated_grammar, indent=2)) | ||
else: | ||
print(encode_as_ebnf(updated_grammar)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
<?php | ||
|
||
if($argc < 2) { | ||
echo "Usage: php $argv[0] <grammar.json>\n"; | ||
exit(1); | ||
} | ||
|
||
function export_as_php_var($var) { | ||
if(is_array($var)) { | ||
$array_notation = "["; | ||
$keys = array_keys($var); | ||
$last_key = end($keys); | ||
$export_keys = json_encode(array_keys($var)) !== json_encode(range(0, count($var) - 1)); | ||
foreach($var as $key => $value) { | ||
if($export_keys) { | ||
$array_notation .= var_export($key, true) . "=>"; | ||
} | ||
$array_notation .= export_as_php_var($value); | ||
if($key !== $last_key) { | ||
$array_notation .= ","; | ||
} | ||
} | ||
$array_notation .= "]"; | ||
return $array_notation; | ||
} | ||
return var_export($var, true); | ||
} | ||
|
||
$grammar = json_decode(file_get_contents($argv[1]), true); | ||
require_once __DIR__ . '/../parser/MySQLLexer.php'; | ||
|
||
// Lookup tables | ||
$rules_offset = 2000; | ||
$rule_id_by_name = []; | ||
$rule_index_by_name = []; | ||
foreach ($grammar as $rule) { | ||
$rules_ids[] = $rule["name"]; | ||
$rule_index_by_name[$rule["name"]] = (count($rules_ids) - 1); | ||
$rule_id_by_name[$rule["name"]] = $rule_index_by_name[$rule["name"]] + $rules_offset; | ||
$compressed_grammar[$rule["name"]] = []; | ||
} | ||
|
||
// Convert rules ids and token ids to integers | ||
$compressed_grammar = []; | ||
foreach($grammar as $rule) { | ||
$new_branches = []; | ||
foreach($rule["bnf"] as $branch) { | ||
$new_branch = []; | ||
foreach($branch as $i => $name) { | ||
$is_terminal = !isset($rule_id_by_name[$name]); | ||
if($is_terminal) { | ||
$new_branch[] = MySQLLexer::getTokenId($name); | ||
} else { | ||
// Use rule id to avoid conflicts with token ids | ||
$new_branch[] = $rule_id_by_name[$name]; | ||
} | ||
} | ||
$new_branches[] = $new_branch; | ||
} | ||
// Use rule index | ||
$compressed_grammar[$rule_index_by_name[$rule["name"]]] = $new_branches; | ||
} | ||
|
||
$full_grammar = [ | ||
"rules_offset" => $rules_offset, | ||
"rules_names" => $rules_ids, | ||
"grammar" => $compressed_grammar | ||
]; | ||
|
||
$php_array = export_as_php_var($full_grammar); | ||
echo "<?php\nreturn " . $php_array . ";"; |
Oops, something went wrong.