-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Add hocc syntax highlighting for kakoune. There's probably a way to avoid the near-complete duplication with the Hemlock syntax highlighting configuration, but it may involve shell evaluation of all highlighter additions. - Add `Os.argv`. Command line parameters may not be UTF-8-encoded, so it's not safe to use OCaml's `Sys.argv`. Finish implementing command line parameter validation Begin implementing `hocc` specification data structures Implement `hocc` scanner Revise `hocc` grammar, define corresponding syntax tree types Enable `hocc` syntax highlighting for the `.hmhi` extension Misc - Revise `hocc` grammar to use ε productions where appropriate. - Implement hmhi parsing. - Add `Hmc.Scan.cursor` as a convenience function. - Revise `hocc` grammar productions to use right recursion, and to support Hemlock code sequences well enough to pass muster for `hocc`'s purposes. Add `Parse.Error`, improve error reporting Add ctx type for parser functions Incorporate token malformations into parse error reporting Add `Hmc.Scan.AbstractToken.malformations` This convenience function extracts malformations, which centralizes the verbose exhaustive pattern matching necessary to do this reliably. Revise `hocc` command line usage to make parser generation optional Start implementing `.hmh` parsing Finish initial implementation of `hocc` parser (doesn't work correctly) Implement parse tree pretty printing, start implementing tracing Fix `hocc` parsing bugs (successfully parses `Example1.hmh[i]`) Add `hocc` parsing tests, fix grammar/implementation bugs Make production parameter lexical bindings optional Add more `hocc` parsing tests Add more `hocc` parser tests Add `Hocc` grammar parsing test Enhance `hocc` grammar to omit `-> ()` for unit-type non-terminals Add token alias syntax `hocc` documentation edits Expand `hocc` documentation Finish initial draft of `hocc.md` Add `hocc` parse error tests Add remaining `hocc` parser tests Add Lyken grammar spec (two forms, one of which uses DAG prec by construction) Remove `UIDENT` from `ProdParamType` Restrict precedence relationships to `<` with previously defined precedences Remove the `-dot`/`-graphviz` command line options, since cycles are no longer possible and the textual precedence relationships closely correspond to the graph. Add `Io` module Implement precedence validation/closure Incorporate `stderr` into `Io.t` and add `Io.fatal` Implement html precedence reporting Implement hocc report generation Make `hocc` scanner lazy Create the `hocc` report directory if needed Expose `Hmc.Scan.ConcretToken.t` definition to enable pattern matching Implement various modules on which `Spec` depends Implement more of `Spec.t` initialization Reduction initialization still isn't implemented, nor any reporting besides `prec`. Implement token/nonterm report generation Misc refactoring, start of reduction procesessing Combine duplicate code into `QualifiedType` Encapsulate `Reduction.param` as `Reduction.Param.t` Enhance first pass to gather qualified type in addition to index Implement `Reduction.Params` Refactor production parameter processing to be universally usable Implement reduction processing Synthesize `PSEUDO_END`, prep for start symbol wrapper synthesis Synthesize wrappers for start symbols Implement symbol first/follow set closure Implement `Lr[01]Item{,set}` Start implementing LR(1) item set closure Implement `Lr1ItemsetClosure.compat_ident` Implement `Lr1ItemsetClosure.compat_weak` Finish implementing `Lr1ItemsetClosure` Implement LR(1) item set closure generation Flesh out item set reporting, start refactoring to share implementation between txt and html reports Add `hocc_test` and integrate txt reports into test validation Unify txt/html report generation Implement unused prec/token/nonterm/prod reporting
- Loading branch information
Jason Evans
committed
Apr 29, 2022
1 parent
d96bbab
commit 6633246
Showing
137 changed files
with
11,642 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
open Basis | ||
|
||
type t = | ||
| Left | ||
| Right | ||
|
||
let pp t formatter = | ||
formatter |> Fmt.fmt (match t with | ||
| Left -> "Left" | ||
| Right -> "Right" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
open Basis | ||
|
||
type t = | ||
| Left | ||
| Right | ||
|
||
include FormattableIntf.SMono with type t := t |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,236 @@ | ||
open! Basis | ||
include Basis.Rudiments | ||
|
||
type algorithm = | ||
| LR1Compact | ||
| LR1Canonical | ||
|
||
let pp_algorithm algorithm formatter = | ||
formatter |> Fmt.fmt (match algorithm with | ||
| LR1Compact -> "LR1Compact" | ||
| LR1Canonical -> "LR1Canonical" | ||
) | ||
|
||
type t = { | ||
verbose: bool; | ||
text: bool; | ||
html: bool; | ||
hocc: bool; | ||
algorithm: algorithm; | ||
hemlock: bool; | ||
ocaml: bool; | ||
srcdir_opt: Path.t option; | ||
module_opt: Path.Segment.t option; | ||
dstdir_opt: Path.t option; | ||
} | ||
|
||
let pp {verbose; text; html; hocc; algorithm; hemlock; ocaml; srcdir_opt; module_opt; dstdir_opt} | ||
formatter = | ||
formatter | ||
|> Fmt.fmt "{verbose=" |> Bool.pp verbose | ||
|> Fmt.fmt "; text=" |> Bool.pp text | ||
|> Fmt.fmt "; html=" |> Bool.pp html | ||
|> Fmt.fmt "; hocc=" |> Bool.pp hocc | ||
|> Fmt.fmt "; algorithm=" |> pp_algorithm algorithm | ||
|> Fmt.fmt "; hemlock=" |> Bool.pp hemlock | ||
|> Fmt.fmt "; ocaml=" |> Bool.pp ocaml | ||
|> Fmt.fmt "; srcdir_opt=" |> (Option.pp Path.pp) srcdir_opt | ||
|> Fmt.fmt "; module_opt=" |> (Option.pp Path.Segment.pp) module_opt | ||
|> Fmt.fmt "; dstdir_opt=" |> (Option.pp Path.pp) dstdir_opt | ||
|> Fmt.fmt "}" | ||
|
||
let default = { | ||
verbose=false; | ||
text=false; | ||
html=false; | ||
hocc=false; | ||
algorithm=LR1Compact; | ||
hemlock=false; | ||
ocaml=false; | ||
srcdir_opt=None; | ||
module_opt=None; | ||
dstdir_opt=None; | ||
} | ||
|
||
let usage error = | ||
let exit_code, formatter = match error with | ||
| false -> 0, File.Fmt.stdout | ||
| true -> 1, File.Fmt.stderr | ||
in | ||
formatter | ||
|> Fmt.fmt {|hocc usage: hocc <parameters> | ||
|
||
Parameters: | ||
-h[elp] : Print command usage and exit. | ||
-v[erbose] : Print progress information during parser generation. | ||
-txt | -text : Write a detailed automoton description in plain text format | ||
to "<outdir>/hocc/<basename>.txt". | ||
-html : Write a detailed automoton description in internally | ||
hyperlinked HTML format to "<outdir>/hocc/<basename>.html". | ||
-hmh | -hocc : Write a complete grammar specification in hocc format to | ||
"<outdir>/hocc/<basename>.hmh", but with all non-terminal | ||
types and reduction code omitted. | ||
-c[anonical] : Generate a canonical LR(1) parser rather than a compact | ||
LR(1) parser. | ||
-hm | -hemlock : Generate a Hemlock-based parser implementation and write it | ||
to "<outdir>/<basename>.hm[i]". | ||
-ml | -ocaml : Generate an OCaml-based parser implementation and write it | ||
to "<outdir>/<basename>.ml[i]". This is brittle | ||
functionality intended only for Hemlock bootstrapping. | ||
-s[rc] <src> : Path and module name of input source, where inputs match | ||
"<src>.hmh[i]" and "<src>" comprises the source directory | ||
and module name, "[<srcdir>/]<module>". | ||
-d[stdir] <dstdir> : Path to directory in which to place generated output, such | ||
that output file paths match "<dstdir>/[hocc/]<module>.*". | ||
Defaults to "<srcdir>". | ||
|} | ||
|> ignore; | ||
Stdlib.exit exit_code | ||
|
||
let is_segment_cident segment = | ||
let rec cont cursor past = begin | ||
match String.C.Cursor.(<) cursor past with | ||
| false -> true | ||
| true -> begin | ||
let cp, cursor' = String.C.Cursor.next cursor in | ||
match cp with | ||
| cp when Codepoint.(cp >= of_char 'A' && cp <= of_char 'Z') -> cont cursor' past | ||
| cp when Codepoint.(cp >= of_char 'a' && cp <= of_char 'z') -> cont cursor' past | ||
| cp when Codepoint.(cp >= of_char '0' && cp <= of_char '9') -> cont cursor' past | ||
| cp when Codepoint.(cp = of_char '_') -> cont cursor' past | ||
| cp when Codepoint.(cp = of_char '\'') -> cont cursor' past | ||
| _ -> false | ||
end | ||
end in | ||
let rec start cursor past = begin | ||
match String.C.Cursor.(<) cursor past with | ||
| false -> false | ||
| true -> begin | ||
let cp, cursor' = String.C.Cursor.next cursor in | ||
match cp with | ||
| cp when Codepoint.(cp = of_char '_') -> start cursor' past | ||
| cp when Codepoint.(cp >= of_char 'A' && cp <= of_char 'Z') -> cont cursor' past | ||
| _ -> false | ||
end | ||
end in | ||
match Path.Segment.to_string segment with | ||
| None -> false | ||
| Some s -> begin | ||
let sslice = String.C.Slice.of_string s in | ||
let base = String.C.Slice.base sslice in | ||
let past = String.C.Slice.past sslice in | ||
start base past | ||
end | ||
|
||
let of_argv argv = | ||
let arg_arg argv i = begin | ||
let i' = succ i in | ||
match i' < Array.length argv with | ||
| false -> begin | ||
let arg = Bytes.to_string_replace (Array.get i argv) in | ||
File.Fmt.stderr |> Fmt.fmt "hocc: " |> Fmt.fmt arg |> Fmt.fmt " argument missing\n" | ||
|> ignore; | ||
usage true | ||
end | ||
| true -> Array.get i' argv | ||
end in | ||
let rec f t argv i = begin | ||
match i < Array.length argv with | ||
| false -> t | ||
| true -> begin | ||
let arg_bytes = Array.get i argv in | ||
let arg_string = Bytes.to_string_replace arg_bytes in | ||
match arg_string with | ||
| "-help" | "-h" -> usage false | ||
| "-verbose" | "-v" -> f {t with verbose=true} argv (succ i) | ||
| "-txt" | "-text" -> f {t with text=true} argv (succ i) | ||
| "-html" -> f {t with html=true} argv (succ i) | ||
| "-hmh" | "-hocc" -> f {t with hocc=true} argv (succ i) | ||
| "-canonical" | "-c" -> f {t with algorithm=LR1Canonical} argv (succ i) | ||
| "-hm" | "-hemlock" -> f {t with hemlock=true} argv (succ i) | ||
| "-ml" | "-ocaml" -> f {t with ocaml=true} argv (succ i) | ||
| "-src" | "-s" -> begin | ||
let path = Path.of_bytes (Bytes.Slice.init (arg_arg argv i)) in | ||
let dirname, basename_opt = Path.split path in | ||
let srcdir_opt = match Path.is_empty dirname with | ||
| true -> None | ||
| false -> Some dirname | ||
in | ||
let module_opt = match basename_opt with | ||
| None -> begin | ||
File.Fmt.stderr | ||
|> Fmt.fmt "hocc: Invalid source: " | ||
|> Path.pp path | ||
|> Fmt.fmt "\n" | ||
|> ignore; | ||
usage true | ||
end | ||
| Some segment -> begin | ||
match is_segment_cident segment with | ||
| false -> begin | ||
File.Fmt.stderr |> Fmt.fmt "hocc: Invalid source module name: " | ||
|> Path.Segment.pp segment |> Fmt.fmt "\n" |> ignore; | ||
usage true | ||
end | ||
| true -> Some segment | ||
end | ||
in | ||
f {t with srcdir_opt; module_opt} argv (i + 2L) | ||
end | ||
| "-dstdir" | "-d" -> begin | ||
let dstdir = Path.of_bytes (Bytes.Slice.init (arg_arg argv i)) in | ||
f {t with dstdir_opt=Some dstdir} argv (i + 2L) | ||
end | ||
| _ -> begin | ||
File.Fmt.stderr | ||
|> Fmt.fmt "hocc: Invalid command line parameter: " | ||
|> String.pp arg_string | ||
|> Fmt.fmt "\n" | ||
|> ignore; | ||
usage true | ||
end | ||
end | ||
end in | ||
let t = f default argv 1L in | ||
match t.module_opt with | ||
| None -> begin | ||
File.Fmt.stderr |> Fmt.fmt "hocc: Source unspecified\n" |> ignore; | ||
usage true | ||
end | ||
| Some _ -> t | ||
|
||
let verbose {verbose; _} = | ||
verbose | ||
|
||
let text {text; _} = | ||
text | ||
|
||
let html {html; _} = | ||
html | ||
|
||
let hocc {hocc; _} = | ||
hocc | ||
|
||
let algorithm {algorithm; _} = | ||
algorithm | ||
|
||
let hemlock {hemlock; _} = | ||
hemlock | ||
|
||
let ocaml {ocaml; _} = | ||
ocaml | ||
|
||
let srcdir {srcdir_opt; _} = | ||
match srcdir_opt with | ||
| None -> Path.of_string "." | ||
| Some srcdir -> srcdir | ||
|
||
let module_ {module_opt; _} = | ||
match module_opt with | ||
| None -> not_reached () | ||
| Some m -> m | ||
|
||
let dstdir {dstdir_opt; _} = | ||
match dstdir_opt with | ||
| None -> Path.of_string "." | ||
| Some dstdir -> dstdir |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
open Basis | ||
|
||
type algorithm = | ||
| LR1Compact | ||
| LR1Canonical | ||
|
||
val pp_algorithm: algorithm -> (module Fmt.Formatter) -> (module Fmt.Formatter) | ||
|
||
type t | ||
|
||
include FormattableIntf.SMono with type t := t | ||
|
||
val of_argv: Bytes.t array -> t | ||
|
||
val verbose: t -> bool | ||
val text: t -> bool | ||
val html: t -> bool | ||
val hocc: t -> bool | ||
val algorithm: t -> algorithm | ||
val hemlock: t -> bool | ||
val ocaml: t -> bool | ||
val srcdir: t -> Path.t | ||
val module_: t -> Path.Segment.t | ||
val dstdir: t -> Path.t |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
(executables | ||
(names hocc) | ||
(libraries Basis Hmc)) | ||
|
||
(install | ||
(section bin) | ||
(files (hocc.exe as hocc))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
open Basis | ||
open! Basis.Rudiments | ||
|
||
let parse_hmhi (Io.{hmhi; _} as io) = | ||
match hmhi with | ||
| Some text -> begin | ||
let scanner = Scan.init text in | ||
let io = | ||
io.log | ||
|> Fmt.fmt "hocc: Parsing " |> Path.pp (Option.value_hlt (Text.path text)) |> Fmt.fmt "\n" | ||
|> Io.with_log io in | ||
let _scanner', hmhi = Parse.hmhi scanner in | ||
match hmhi with | ||
| Error errors -> begin | ||
List.iter (List.sort errors ~cmp:Parse.Error.cmp) ~f:(fun error -> | ||
File.Fmt.stderr |> Parse.Error.fmt ~alt:true error |> ignore | ||
); | ||
Stdlib.exit 1 | ||
end | ||
| Ok hmhi -> io, Some hmhi | ||
end | ||
| None -> io, None | ||
|
||
let parse_hmh (Io.{hmh; _} as io) = | ||
let scanner = Scan.init hmh in | ||
let io = | ||
io.log | ||
|> Fmt.fmt "hocc: Parsing " |> Path.pp (Option.value_hlt (Text.path hmh)) |> Fmt.fmt "\n" | ||
|> Io.with_log io in | ||
let _scanner', hmh = Parse.hmh scanner in | ||
match hmh with | ||
| Error errors -> begin | ||
List.iter (List.sort errors ~cmp:Parse.Error.cmp) ~f:(fun error -> | ||
File.Fmt.stderr |> Parse.Error.fmt ~alt:true error |> ignore | ||
); | ||
Stdlib.exit 1 | ||
end | ||
| Ok hmh -> io, hmh | ||
|
||
let _ = | ||
let conf = Conf.of_argv Os.argv in | ||
let io = Io.init conf in | ||
let io, _hmhi_opt = parse_hmhi io in | ||
let io, hmh = parse_hmh io in | ||
let io, spec = Spec.init conf io hmh in | ||
let io = match Conf.text conf with | ||
| false -> io | ||
| true -> Spec.to_txt conf io spec | ||
in | ||
let io = match Conf.html conf with | ||
| false -> io | ||
| true -> Spec.to_html conf io spec | ||
in | ||
let io = match Conf.hocc conf with | ||
| false -> io | ||
| true -> Spec.to_hocc io spec | ||
in | ||
let _io = Io.fini conf io in | ||
() |
Oops, something went wrong.