diff --git a/.editorconfig b/.editorconfig index 61a49e703..ab94a347d 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,6 +1,6 @@ # https://editorconfig.org/ -[*.{hm,hmi}] +[*.{hm,hmi,hmh,hmhi}] indent_style = space indent_size = 4 tab_width = 8 diff --git a/bootstrap/bin/dune b/bootstrap/bin/hmc/dune similarity index 100% rename from bootstrap/bin/dune rename to bootstrap/bin/hmc/dune diff --git a/bootstrap/bin/hmc.ml b/bootstrap/bin/hmc/hmc.ml similarity index 89% rename from bootstrap/bin/hmc.ml rename to bootstrap/bin/hmc/hmc.ml index 3a04732dd..b61fb2aca 100644 --- a/bootstrap/bin/hmc.ml +++ b/bootstrap/bin/hmc/hmc.ml @@ -36,10 +36,9 @@ let scan_file path = () let _ = - match Array.length Sys.argv with + match Array.length Os.argv with | 0L | 1L -> halt "hmc usage: hmc " | _ -> begin - let path_str = Array.get 1L Sys.argv in - let path = Path.of_string path_str in + let path = Path.of_bytes (Bytes.Slice.init (Array.get 1L Os.argv)) in scan_file path end diff --git a/bootstrap/bin/hocc/assoc.ml b/bootstrap/bin/hocc/assoc.ml new file mode 100644 index 000000000..60badce54 --- /dev/null +++ b/bootstrap/bin/hocc/assoc.ml @@ -0,0 +1,11 @@ +open Basis + +type t = + | Left + | Right + +let pp t formatter = + formatter |> Fmt.fmt (match t with + | Left -> "Left" + | Right -> "Right" + ) diff --git a/bootstrap/bin/hocc/assoc.mli b/bootstrap/bin/hocc/assoc.mli new file mode 100644 index 000000000..3884790d4 --- /dev/null +++ b/bootstrap/bin/hocc/assoc.mli @@ -0,0 +1,7 @@ +open Basis + +type t = + | Left + | Right + +include FormattableIntf.SMono with type t := t diff --git a/bootstrap/bin/hocc/conf.ml b/bootstrap/bin/hocc/conf.ml new file mode 100644 index 000000000..65e4da51d --- /dev/null +++ b/bootstrap/bin/hocc/conf.ml @@ -0,0 +1,236 @@ +open! Basis +include Basis.Rudiments + +type algorithm = + | LR1Compact + | LR1Canonical + +let pp_algorithm algorithm formatter = + formatter |> Fmt.fmt (match algorithm with + | LR1Compact -> "LR1Compact" + | LR1Canonical -> "LR1Canonical" + ) + +type t = { + verbose: bool; + text: bool; + html: bool; + hocc: bool; + algorithm: algorithm; + hemlock: bool; + ocaml: bool; + srcdir_opt: Path.t option; + module_opt: Path.Segment.t option; + dstdir_opt: Path.t option; +} + +let pp {verbose; text; html; hocc; algorithm; hemlock; ocaml; srcdir_opt; module_opt; dstdir_opt} + formatter = + formatter + |> Fmt.fmt "{verbose=" |> Bool.pp verbose + |> Fmt.fmt "; text=" |> Bool.pp text + |> Fmt.fmt "; html=" |> Bool.pp html + |> Fmt.fmt "; hocc=" |> Bool.pp hocc + |> Fmt.fmt "; algorithm=" |> pp_algorithm algorithm + |> Fmt.fmt "; hemlock=" |> Bool.pp hemlock + |> Fmt.fmt "; ocaml=" |> Bool.pp ocaml + |> Fmt.fmt "; srcdir_opt=" |> (Option.pp Path.pp) srcdir_opt + |> Fmt.fmt "; module_opt=" |> (Option.pp Path.Segment.pp) module_opt + |> Fmt.fmt "; dstdir_opt=" |> (Option.pp Path.pp) dstdir_opt + |> Fmt.fmt "}" + +let default = { + verbose=false; + text=false; + html=false; + hocc=false; + algorithm=LR1Compact; + hemlock=false; + ocaml=false; + srcdir_opt=None; + module_opt=None; + dstdir_opt=None; +} + +let usage error = + let exit_code, formatter = match error with + | false -> 0, File.Fmt.stdout + | true -> 1, File.Fmt.stderr + in + formatter + |> Fmt.fmt {|hocc usage: hocc + +Options: + -h[elp] : Print command usage and exit. + -v[erbose] : Print progress information during parser generation. + -txt | -text : Write a detailed automoton description in plain text format + to "/hocc/.txt". + -html : Write a detailed automoton description in internally + hyperlinked HTML format to "/hocc/.html". + -hmh | -hocc : Write a complete grammar specification in hocc format to + "/hocc/.hmh", but with all non-terminal + types and reduction code omitted. + -c[anonical] : Generate a canonical LR(1) parser rather than a compact + LR(1) parser. + -hm | -hemlock : Generate a Hemlock-based parser implementation and write it + to "/.hm[i]". + -ml | -ocaml : Generate an OCaml-based parser implementation and write it + to "/.ml[i]". This is brittle + functionality intended only for Hemlock bootstrapping. + -s[rc] : Path and module name of input source, where inputs match + ".hmh[i]" and "" comprises the source directory + and module name, "[/]". +-d[stdir] : Path to directory in which to place generated output, such + that output file paths match "/[hocc/].*". + Defaults to "". +|} + |> ignore; + Stdlib.exit exit_code + +let is_segment_cident segment = + let rec cont cursor past = begin + match String.C.Cursor.(<) cursor past with + | false -> true + | true -> begin + let cp, cursor' = String.C.Cursor.next cursor in + match cp with + | cp when Codepoint.(cp >= of_char 'A' && cp <= of_char 'Z') -> cont cursor' past + | cp when Codepoint.(cp >= of_char 'a' && cp <= of_char 'z') -> cont cursor' past + | cp when Codepoint.(cp >= of_char '0' && cp <= of_char '9') -> cont cursor' past + | cp when Codepoint.(cp = of_char '_') -> cont cursor' past + | cp when Codepoint.(cp = of_char '\'') -> cont cursor' past + | _ -> false + end + end in + let rec start cursor past = begin + match String.C.Cursor.(<) cursor past with + | false -> false + | true -> begin + let cp, cursor' = String.C.Cursor.next cursor in + match cp with + | cp when Codepoint.(cp = of_char '_') -> start cursor' past + | cp when Codepoint.(cp >= of_char 'A' && cp <= of_char 'Z') -> cont cursor' past + | _ -> false + end + end in + match Path.Segment.to_string segment with + | None -> false + | Some s -> begin + let sslice = String.C.Slice.of_string s in + let base = String.C.Slice.base sslice in + let past = String.C.Slice.past sslice in + start base past + end + +let of_argv argv = + let arg_arg argv i = begin + let i' = succ i in + match i' < Array.length argv with + | false -> begin + let arg = Bytes.to_string_replace (Array.get i argv) in + File.Fmt.stderr |> Fmt.fmt "hocc: " |> Fmt.fmt arg |> Fmt.fmt " argument missing\n" + |> ignore; + usage true + end + | true -> Array.get i' argv + end in + let rec f t argv i = begin + match i < Array.length argv with + | false -> t + | true -> begin + let arg_bytes = Array.get i argv in + let arg_string = Bytes.to_string_replace arg_bytes in + match arg_string with + | "-help" | "-h" -> usage false + | "-verbose" | "-v" -> f {t with verbose=true} argv (succ i) + | "-txt" | "-text" -> f {t with text=true} argv (succ i) + | "-html" -> f {t with html=true} argv (succ i) + | "-hmh" | "-hocc" -> f {t with hocc=true} argv (succ i) + | "-canonical" | "-c" -> f {t with algorithm=LR1Canonical} argv (succ i) + | "-hm" | "-hemlock" -> f {t with hemlock=true} argv (succ i) + | "-ml" | "-ocaml" -> f {t with ocaml=true} argv (succ i) + | "-src" | "-s" -> begin + let path = Path.of_bytes (Bytes.Slice.init (arg_arg argv i)) in + let dirname, basename_opt = Path.split path in + let srcdir_opt = match Path.is_empty dirname with + | true -> None + | false -> Some dirname + in + let module_opt = match basename_opt with + | None -> begin + File.Fmt.stderr + |> Fmt.fmt "hocc: Invalid source: " + |> Path.pp path + |> Fmt.fmt "\n" + |> ignore; + usage true + end + | Some segment -> begin + match is_segment_cident segment with + | false -> begin + File.Fmt.stderr |> Fmt.fmt "hocc: Invalid source module name: " + |> Path.Segment.pp segment |> Fmt.fmt "\n" |> ignore; + usage true + end + | true -> Some segment + end + in + f {t with srcdir_opt; module_opt} argv (i + 2L) + end + | "-dstdir" | "-d" -> begin + let dstdir = Path.of_bytes (Bytes.Slice.init (arg_arg argv i)) in + f {t with dstdir_opt=Some dstdir} argv (i + 2L) + end + | _ -> begin + File.Fmt.stderr + |> Fmt.fmt "hocc: Invalid command line parameter: " + |> String.pp arg_string + |> Fmt.fmt "\n" + |> ignore; + usage true + end + end + end in + let t = f default argv 1L in + match t.module_opt with + | None -> begin + File.Fmt.stderr |> Fmt.fmt "hocc: Source unspecified\n" |> ignore; + usage true + end + | Some _ -> t + +let verbose {verbose; _} = + verbose + +let text {text; _} = + text + +let html {html; _} = + html + +let hocc {hocc; _} = + hocc + +let algorithm {algorithm; _} = + algorithm + +let hemlock {hemlock; _} = + hemlock + +let ocaml {ocaml; _} = + ocaml + +let srcdir {srcdir_opt; _} = + match srcdir_opt with + | None -> Path.of_string "." + | Some srcdir -> srcdir + +let module_ {module_opt; _} = + match module_opt with + | None -> not_reached () + | Some m -> m + +let dstdir {dstdir_opt; _} = + match dstdir_opt with + | None -> Path.of_string "." + | Some dstdir -> dstdir diff --git a/bootstrap/bin/hocc/conf.mli b/bootstrap/bin/hocc/conf.mli new file mode 100644 index 000000000..bc15f1f03 --- /dev/null +++ b/bootstrap/bin/hocc/conf.mli @@ -0,0 +1,24 @@ +open Basis + +type algorithm = + | LR1Compact + | LR1Canonical + +val pp_algorithm: algorithm -> (module Fmt.Formatter) -> (module Fmt.Formatter) + +type t + +include FormattableIntf.SMono with type t := t + +val of_argv: Bytes.t array -> t + +val verbose: t -> bool +val text: t -> bool +val html: t -> bool +val hocc: t -> bool +val algorithm: t -> algorithm +val hemlock: t -> bool +val ocaml: t -> bool +val srcdir: t -> Path.t +val module_: t -> Path.Segment.t +val dstdir: t -> Path.t diff --git a/bootstrap/bin/hocc/dune b/bootstrap/bin/hocc/dune new file mode 100644 index 000000000..15f70f33e --- /dev/null +++ b/bootstrap/bin/hocc/dune @@ -0,0 +1,7 @@ +(executables + (names hocc) + (libraries Basis Hmc)) + +(install + (section bin) + (files (hocc.exe as hocc))) diff --git a/bootstrap/bin/hocc/hocc.ml b/bootstrap/bin/hocc/hocc.ml new file mode 100644 index 000000000..c2fde3423 --- /dev/null +++ b/bootstrap/bin/hocc/hocc.ml @@ -0,0 +1,59 @@ +open Basis +open! Basis.Rudiments + +let parse_hmhi (Io.{hmhi; _} as io) = + match hmhi with + | Some text -> begin + let scanner = Scan.init text in + let io = + io.log + |> Fmt.fmt "hocc: Parsing " |> Path.pp (Option.value_hlt (Text.path text)) |> Fmt.fmt "\n" + |> Io.with_log io in + let _scanner', hmhi = Parse.hmhi scanner in + match hmhi with + | Error errors -> begin + List.iter (List.sort errors ~cmp:Parse.Error.cmp) ~f:(fun error -> + File.Fmt.stderr |> Parse.Error.fmt ~alt:true error |> ignore + ); + Stdlib.exit 1 + end + | Ok hmhi -> io, Some hmhi + end + | None -> io, None + +let parse_hmh (Io.{hmh; _} as io) = + let scanner = Scan.init hmh in + let io = + io.log + |> Fmt.fmt "hocc: Parsing " |> Path.pp (Option.value_hlt (Text.path hmh)) |> Fmt.fmt "\n" + |> Io.with_log io in + let _scanner', hmh = Parse.hmh scanner in + match hmh with + | Error errors -> begin + List.iter (List.sort errors ~cmp:Parse.Error.cmp) ~f:(fun error -> + File.Fmt.stderr |> Parse.Error.fmt ~alt:true error |> ignore + ); + Stdlib.exit 1 + end + | Ok hmh -> io, hmh + +let _ = + let conf = Conf.of_argv Os.argv in + let io = Io.init conf in + let io, _hmhi_opt = parse_hmhi io in + let io, hmh = parse_hmh io in + let io, spec = Spec.init io hmh in + let io = match Conf.text conf with + | false -> io + | true -> Spec.to_txt conf io spec + in + let io = match Conf.html conf with + | false -> io + | true -> Spec.to_html conf io spec + in + let io = match Conf.hocc conf with + | false -> io + | true -> Spec.to_hocc io spec + in + let _io = Io.fini conf io in + () diff --git a/bootstrap/bin/hocc/io.ml b/bootstrap/bin/hocc/io.ml new file mode 100644 index 000000000..227af7987 --- /dev/null +++ b/bootstrap/bin/hocc/io.ml @@ -0,0 +1,179 @@ +open Basis +include Basis.Rudiments + +type t = { + err: (module Fmt.Formatter); + hmhi: Text.t option; + hmh: Text.t; + log: (module Fmt.Formatter); + txt: (module Fmt.Formatter); + html: (module Fmt.Formatter); + hocc: (module Fmt.Formatter); + hmi: (module Fmt.Formatter); + hm: (module Fmt.Formatter); + mli: (module Fmt.Formatter); + ml: (module Fmt.Formatter); +} + +let init_err _conf = + File.Fmt.stderr + +let path_with_suffix ?(is_report=false) conf suffix = + Path.join [ + (Conf.srcdir conf); + Path.of_string (match is_report with false -> "" | true -> "hocc"); + Path.of_segment (Path.Segment.join [ + (Conf.module_ conf); + Option.value_hlt Path.(basename (of_string suffix)) + ]); + ] + +let open_infile_as_text path = + match File.of_path path with + | Ok f -> begin + let stream = File.Stream.of_file f in + let text = Text.of_bytes_stream ~path stream in + Ok text + end + | Error _ as error -> error + +let init_hmhi conf = + let path = path_with_suffix conf ".hmhi" in + match open_infile_as_text path with + | Ok text -> Some text + | Error _ -> None + +let open_error ~err path error = + let _err = + err + |> Fmt.fmt "hocc: File.of_path " |> Path.pp path |> Fmt.fmt ": " + |> Fmt.fmt (Errno.to_string error) + |> Fmt.fmt "\n" + in + Stdlib.exit 1 + +let init_hmh conf ~err = + let path = path_with_suffix conf ".hmh" in + match open_infile_as_text path with + | Ok text -> text + | Error error -> open_error ~err path error + +let init_log conf = + match Conf.verbose conf with + | false -> File.Fmt.sink + | true -> File.Fmt.stdout + +let init_txt conf = + match Conf.text conf with + | false -> File.Fmt.sink + | true -> String.Fmt.empty + +let init_html conf = + match Conf.html conf with + | false -> File.Fmt.sink + | true -> String.Fmt.empty + +let init_hocc conf = + match Conf.hocc conf with + | false -> File.Fmt.sink + | true -> String.Fmt.empty + +let init_hmi conf hmhi = + match Conf.hemlock conf, hmhi with + | false, _ + | _, None -> File.Fmt.sink + | true, Some _ -> String.Fmt.empty + +let init_hm conf = + match Conf.hemlock conf with + | false -> File.Fmt.sink + | true -> String.Fmt.empty + +let init_mli conf hmhi = + match Conf.ocaml conf, hmhi with + | false, _ + | _, None -> File.Fmt.sink + | true, Some _ -> String.Fmt.empty + +let init_ml conf = + match Conf.ocaml conf with + | false -> File.Fmt.sink + | true -> String.Fmt.empty + +let init conf = + let err = init_err conf in + let hmhi = init_hmhi conf in + let hmh = init_hmh conf ~err in + let log = init_log conf in + let txt = init_txt conf in + let html = init_html conf in + let hocc = init_hocc conf in + let hmi = init_hmi conf hmhi in + let hm = init_hm conf in + let mli = init_mli conf hmhi in + let ml = init_ml conf in + + {err; hmhi; hmh; log; txt; html; hocc; hmi; hm; mli; ml} + +let open_outfile_as_formatter ~is_report ~err path = + let _ = match is_report with + | false -> () + | true -> Os.mkdirat (Path.dirname path) |> ignore + in + match File.of_path ~flag:File.Flag.W path with + | Ok f -> File.Fmt.of_t f + | Error error -> open_error ~err path error + +let fini_formatter ?(is_report=false) conf ~err ~log formatter suffix = + match Fmt.sync formatter with + | To_string s -> begin + let path = path_with_suffix ~is_report conf suffix in + let log' = log |> Fmt.fmt "hocc: Writing " |> Path.pp path |> Fmt.fmt "\n" in + let formatter = open_outfile_as_formatter ~is_report ~err path in + let formatter' = formatter |> Fmt.fmt s |> Fmt.flush in + log', formatter' + end + | Synced formatter' -> log, formatter' + +let fini conf ({err; log; txt; html; hocc; hmi; hm; mli; ml; _} as t) = + let log, txt = fini_formatter ~is_report:true conf ~err ~log txt ".txt" in + let log, html = fini_formatter ~is_report:true conf ~err ~log html ".html" in + let log, hocc = fini_formatter ~is_report:true conf ~err ~log hocc ".hmh" in + let log, hmi = fini_formatter conf ~err ~log hmi ".hmi" in + let log, hm = fini_formatter conf ~err ~log hm ".hm" in + let log, mli = fini_formatter conf ~err ~log mli ".mli" in + let log, ml = fini_formatter conf ~err ~log ml ".ml" in + let log = Fmt.flush log in + {t with log; txt; html; hocc; hmi; hm; mli; ml} + +let fatal {err; _} = + let _err = Fmt.flush err in + Stdlib.exit 1 + +let with_log t log = + let log = Fmt.flush log in + {t with log} + +let with_err t err = + {t with err} + +let with_txt t txt = + {t with txt} + +let with_html t html = + {t with html} + +let with_hocc t hocc = + {t with hocc} + +let with_hmi t hmi = + {t with hmi} + +let with_hm t hm = + {t with hm} + +let with_mli t mli = + {t with mli} + +let with_ml t ml = + {t with ml} diff --git a/bootstrap/bin/hocc/io.mli b/bootstrap/bin/hocc/io.mli new file mode 100644 index 000000000..14866abd7 --- /dev/null +++ b/bootstrap/bin/hocc/io.mli @@ -0,0 +1,29 @@ +open Basis + +type t = { + err: (module Fmt.Formatter); + hmhi: Text.t option; + hmh: Text.t; + log: (module Fmt.Formatter); + txt: (module Fmt.Formatter); + html: (module Fmt.Formatter); + hocc: (module Fmt.Formatter); + hmi: (module Fmt.Formatter); + hm: (module Fmt.Formatter); + mli: (module Fmt.Formatter); + ml: (module Fmt.Formatter); +} + +val init: Conf.t -> t +val fini: Conf.t -> t -> t +val fatal: t -> 'a + +val with_err: t -> (module Fmt.Formatter) -> t +val with_log: t -> (module Fmt.Formatter) -> t +val with_txt: t -> (module Fmt.Formatter) -> t +val with_html: t -> (module Fmt.Formatter) -> t +val with_hocc: t -> (module Fmt.Formatter) -> t +val with_hmi: t -> (module Fmt.Formatter) -> t +val with_hm: t -> (module Fmt.Formatter) -> t +val with_mli: t -> (module Fmt.Formatter) -> t +val with_ml: t -> (module Fmt.Formatter) -> t diff --git a/bootstrap/bin/hocc/parse.ml b/bootstrap/bin/hocc/parse.ml new file mode 100644 index 000000000..ba5e07ac1 --- /dev/null +++ b/bootstrap/bin/hocc/parse.ml @@ -0,0 +1,1638 @@ +open Basis +open! Basis.Rudiments + +module Error = struct + module T = struct + type t = { + source: Hmc.Source.Slice.t; + msg: string; + } + + let cmp t0 t1 = + Hmc.Source.Slice.cmp t0.source t1.source + + let pp {source; msg} formatter = + formatter + |> Fmt.fmt "{source=" |> Hmc.Source.Slice.pp source + |> Fmt.fmt "; msg=" |> String.pp msg + |> Fmt.fmt "}" + + let fmt ?(alt=false) ({source; msg} as t) formatter = + match alt with + | false -> pp t formatter + | true -> begin + formatter + |> Fmt.fmt "hocc: At " + |> Hmc.Source.Slice.pp source + |> Fmt.fmt ": " + |> Fmt.fmt msg + |> Fmt.fmt "\n" + end + end + include T + include Cmpable.Make(T) + + let init_token token msg = + {source=Scan.Token.source token; msg} + + let init_mal mal = + let open Hmc.Scan.AbstractToken.Rendition.Malformation in + {source=source mal; msg=description mal} + + let init_scanner scanner msg = + let cursor = Scan.cursor scanner in + let source = Hmc.Source.Slice.of_cursors ~base:cursor ~past:cursor in + {source; msg} +end + +type uident = + | Uident of {uident: Scan.Token.t} +and cident = + | Cident of {cident: Scan.Token.t} +and ident = + | IdentUident of {uident: uident} + | IdentCident of {cident: cident} + | IdentUscore of {uscore: Scan.Token.t} +and precs_tl = + | PrecsTlCommaUident of {comma: Scan.Token.t; uident: uident; precs_tl: precs_tl} + | PrecsTlEpsilon +and precs = + | Precs of {uident: uident; precs_tl: precs_tl} +and prec_rels = + | PrecRelsLtPrecs of {lt: Scan.Token.t; precs: precs} + | PrecRelsEpsilon +and prec_type = + | PrecTypePrec of {prec: Scan.Token.t} + | PrecTypeLeft of {left: Scan.Token.t} + | PrecTypeRight of {right: Scan.Token.t} +and prec = + | Prec of {prec_type: prec_type; uident: uident; prec_rels: prec_rels} +and of_type = + | OfType of {of_: Scan.Token.t; type_module: cident; dot: Scan.Token.t; type_type: uident} +and of_type0 = + | OfType0OfType of {of_type: of_type} + | OfType0Epsilon +and prec_ref = + | PrecRefPrecUident of {prec: Scan.Token.t; uident: uident} + | PrecRefEpsilon +and token_alias = + | TokenAlias of {alias: Scan.Token.t} + | TokenAliasEpsilon +and token = + | Token of {token: Scan.Token.t; cident: cident; token_alias: token_alias; of_type0: of_type0; + prec_ref: prec_ref} +and sep = + | SepLineDelim of {line_delim: Scan.Token.t} + | SepSemi of {semi: Scan.Token.t} + | SepBar of {bar: Scan.Token.t} +and codes_tl = + | CodesTlSepCode of {sep: sep; code: code; codes_tl: codes_tl} + | CodesTlEpsilon +and codes = + | Codes of {code: code; codes_tl: codes_tl} +and codes0 = + | Codes0Codes of {codes: codes} + | Codes0Epsilon +and delimited = + | DelimitedBlock of {indent: Scan.Token.t; codes: codes; dedent: Scan.Token.t} + | DelimitedParen of {lparen: Scan.Token.t; codes0: codes0; rparen: Scan.Token.t} + | DelimitedCapture of {lcapture: Scan.Token.t; codes0: codes0; rcapture: Scan.Token.t} + | DelimitedList of {lbrack: Scan.Token.t; codes0: codes0; rbrack: Scan.Token.t} + | DelimitedArray of {larray: Scan.Token.t; codes0: codes0; rarray: Scan.Token.t} + | DelimitedModule of {lcurly: Scan.Token.t; codes0: codes0; rcurly: Scan.Token.t} +and code_tl = + | CodeTlDelimited of {delimited: delimited; code_tl: code_tl} + | CodeTlToken of {token: Scan.Token.t; code_tl: code_tl} + | CodeTlEpsilon +and code = + | CodeDelimited of {delimited: delimited; code_tl: code_tl} + | CodeToken of {token: Scan.Token.t; code_tl: code_tl} +and prod_param_type = + | ProdParamTypeCident of {cident: cident} + | ProdParamTypeAlias of {alias: Scan.Token.t} +and prod_param_ident = + | ProdParamIdentIdentColon of {ident: ident; colon: Scan.Token.t} + | ProdParamIdentEpsilon +and prod_param = + | ProdParam of {prod_param_ident: prod_param_ident; prod_param_type: prod_param_type} +and prod_params_tl = + | ProdParamsTlProdParam of {prod_param: prod_param; prod_params_tl: prod_params_tl} + | ProdParamsTlEpsilon +and prod_params = + | ProdParamsProdParam of {prod_param: prod_param; prod_params_tl: prod_params_tl} +and prod_pattern = + | ProdPatternParams of {prod_params: prod_params} + | ProdPatternEpsilon of {epsilon: Scan.Token.t} +and prod = + | Prod of {prod_pattern: prod_pattern; prec_ref: prec_ref} +and prods_tl = + | ProdsTlBarProd of {bar: Scan.Token.t; prod: prod; prods_tl: prods_tl} + | ProdsTlEpsilon +and prods = + | ProdsBarProd of {bar: Scan.Token.t; prod: prod; prods_tl: prods_tl} + | ProdsProd of {prod: prod; prods_tl: prods_tl} +and reduction = + | Reduction of {prods: prods; arrow: Scan.Token.t; code: code} +and reductions_tl = + | ReductionsTlBarReduction of {bar: Scan.Token.t; reduction: reduction; + reductions_tl: reductions_tl} + | ReductionsTlEpsilon +and reductions = + | ReductionsBarReduction of {bar: Scan.Token.t; reduction: reduction; + reductions_tl: reductions_tl} + | ReductionsReduction of {reduction: reduction; reductions_tl: reductions_tl} +and nonterm_type = + | NontermTypeNonterm of {nonterm: Scan.Token.t} + | NontermTypeStart of {start: Scan.Token.t} +and nonterm = + | NontermReductions of {nonterm_type: nonterm_type; cident: cident; of_type: of_type; + prec_ref: prec_ref; cce: Scan.Token.t; reductions: reductions} + | NontermProds of {nonterm_type: nonterm_type; cident: cident; prec_ref: prec_ref; + cce: Scan.Token.t; prods: prods} +and stmt = + | StmtPrec of {prec: prec} + | StmtToken of {token: token} + | StmtNonterm of {nonterm: nonterm} + | StmtCode of {code: code} +and stmts_tl = + | StmtsTl of {line_delim: Scan.Token.t; stmt: stmt; stmts_tl: stmts_tl} + | StmtsTlEpsilon +and stmts = + | Stmts of {stmt: stmt; stmts_tl: stmts_tl} +and hocc = + | Hocc of {hocc: Scan.Token.t; indent: Scan.Token.t; stmts: stmts; dedent: Scan.Token.t} +and eoi = + | Eoi of {eoi: Scan.Token.t} +and matter = + | Matter of {token: Scan.Token.t; matter: matter} + | MatterEpsilon +and hmh = + | Hmh of {prelude: matter; hocc: hocc; postlude: matter; eoi: eoi} +and hmhi = + | Hmhi of {prelude: matter; hocc: Scan.Token.t; postlude: matter; eoi: eoi} + +let fmt_lcurly ~alt ~width formatter = + match alt with + | false -> formatter |> Fmt.fmt "{" + | true -> + formatter + |> Fmt.fmt "{\n" + |> Fmt.fmt ~pad:" " ~just:Fmt.Left ~width:(width + 4L) "" + +let fmt_semi ~alt ~width formatter = + match alt with + | false -> formatter |> Fmt.fmt "; " + | true -> + formatter + |> Fmt.fmt "\n" + |> Fmt.fmt ~pad:" " ~just:Fmt.Left ~width:(width + 4L) "" + +let fmt_rcurly ~alt ~width formatter = + match alt with + | false -> formatter |> Fmt.fmt "}" + | true -> + formatter + |> Fmt.fmt "\n" + |> Fmt.fmt ~pad:" " ~just:Fmt.Left ~width:(width + 2L) "" + |> Fmt.fmt "}" + +let rec fmt_uident ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) uident formatter = + match uident with + | Uident {uident} -> + formatter + |> Fmt.fmt "Uident " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "uident=" |> Scan.Token.pp uident + |> fmt_rcurly ~alt ~width +and pp_uident uident formatter = + fmt_uident uident formatter + +let rec fmt_cident ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) cident formatter = + match cident with + | Cident {cident} -> + formatter + |> Fmt.fmt "Cident " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "cident=" |> Scan.Token.pp cident + |> fmt_rcurly ~alt ~width +and pp_cident cident formatter = + fmt_cident cident formatter + +and fmt_ident ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) ident formatter = + match ident with + | IdentUident {uident} -> + formatter |> Fmt.fmt "IdentUident " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "uident=" |> pp_uident uident + |> fmt_rcurly ~alt ~width + | IdentCident {cident} -> + formatter |> Fmt.fmt "IdentCident " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "cident=" |> pp_cident cident + |> fmt_rcurly ~alt ~width + | IdentUscore {uscore} -> + formatter |> Fmt.fmt "IdentUscore " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "uscore=" |> Scan.Token.pp uscore + |> fmt_rcurly ~alt ~width +and pp_ident ident formatter = + fmt_ident ident formatter + +and fmt_precs_tl ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) precs_tl + formatter = + let width' = width + 4L in + match precs_tl with + | PrecsTlCommaUident {comma; uident; precs_tl} -> + formatter |> Fmt.fmt "PrecsTlCommaUident " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "comma=" |> Scan.Token.pp comma + |> fmt_semi ~alt ~width + |> Fmt.fmt "uident=" |> fmt_uident ~alt ~width:width' uident + |> fmt_semi ~alt ~width + |> Fmt.fmt "precs_tl=" |> fmt_precs_tl ~alt ~width:width' precs_tl + |> fmt_rcurly ~alt ~width + | PrecsTlEpsilon -> + formatter |> Fmt.fmt "PrecsTlEpsilon" +and pp_precs_tl precs_tl formatter = + fmt_precs_tl precs_tl formatter + +and fmt_precs ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) precs formatter = + let width' = width + 4L in + match precs with + | Precs {uident; precs_tl} -> + formatter |> Fmt.fmt "Precs " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "uident=" |> fmt_uident ~alt ~width:width' uident + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "precs_tl=" |> fmt_precs_tl ~alt ~width:width' precs_tl + |> fmt_rcurly ~alt ~width +and pp_precs precs formatter = + fmt_precs precs formatter + +and fmt_prec_rels ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prec_rels formatter = + let width' = width + 4L in + match prec_rels with + | PrecRelsLtPrecs {lt; precs} -> + formatter |> Fmt.fmt "PrecRelsLtPrecs " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "lt=" |> Scan.Token.pp lt + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "precs=" |> fmt_precs ~alt ~width:width' precs + |> fmt_rcurly ~alt ~width + | PrecRelsEpsilon -> + formatter |> Fmt.fmt "PrecRelsEpsilon" +and pp_prec_rels prec_rels formatter = + fmt_prec_rels prec_rels formatter + +and fmt_prec_type ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prec_type formatter = + match prec_type with + | PrecTypePrec {prec} -> + formatter |> Fmt.fmt "PrecTypePrec " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prec=" |> Scan.Token.pp prec + |> fmt_rcurly ~alt ~width + | PrecTypeLeft {left} -> + formatter |> Fmt.fmt "PrecTypeLeft " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "left=" |> Scan.Token.pp left + |> fmt_rcurly ~alt ~width + | PrecTypeRight {right} -> + formatter |> Fmt.fmt "PrecTypeRight " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "right=" |> Scan.Token.pp right + |> fmt_rcurly ~alt ~width +and pp_prec_type prec_type formatter = + fmt_prec_type prec_type formatter + +and fmt_prec ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prec formatter = + let width' = width + 4L in + match prec with + | Prec {prec_type; uident; prec_rels} -> + formatter |> Fmt.fmt "Prec " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prec_type=" |> fmt_prec_type ~alt ~width:width' prec_type + |> fmt_semi ~alt ~width + |> Fmt.fmt "uident=" |> fmt_uident ~alt ~width:width' uident + |> fmt_semi ~alt ~width + |> Fmt.fmt "prec_rels=" |> fmt_prec_rels ~alt ~width:width' prec_rels + |> fmt_rcurly ~alt ~width +and pp_prec prec formatter = + fmt_prec prec formatter + +and fmt_of_type ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) of_type formatter = + let width' = width + 4L in + match of_type with + | OfType {of_; type_module; dot; type_type} -> + formatter |> Fmt.fmt "OfType " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "of_=" |> Scan.Token.pp of_ + |> fmt_semi ~alt ~width + |> Fmt.fmt "type_module=" |> fmt_cident ~alt ~width:width' type_module + |> fmt_semi ~alt ~width + |> Fmt.fmt "dot=" |> Scan.Token.pp dot + |> fmt_semi ~alt ~width + |> Fmt.fmt "type_type=" |> fmt_uident ~alt ~width:width' type_type + |> fmt_rcurly ~alt ~width +and pp_of_type of_type formatter = + fmt_of_type of_type formatter + +and fmt_of_type0 ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) of_type0 formatter = + let width' = width + 4L in + match of_type0 with + | OfType0OfType {of_type} -> + formatter |> Fmt.fmt "OfType0OfType " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "of_type=" |> fmt_of_type ~alt ~width:width' of_type + |> fmt_rcurly ~alt ~width + | OfType0Epsilon -> + formatter |> Fmt.fmt "OfType0Epsilon" +and pp_of_type0 of_type0 formatter = + fmt_of_type0 of_type0 formatter + +and fmt_prec_ref ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prec_ref formatter = + let width' = width + 4L in + match prec_ref with + | PrecRefPrecUident {prec; uident} -> + formatter |> Fmt.fmt "PrecRefPrecUident " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prec=" |> Scan.Token.pp prec + |> fmt_semi ~alt ~width + |> Fmt.fmt "uident=" |> fmt_uident ~alt ~width:width' uident + |> fmt_rcurly ~alt ~width + | PrecRefEpsilon -> + formatter |> Fmt.fmt "PrecRefEpsilon" +and pp_prec_ref prec_ref formatter = + fmt_prec_ref prec_ref formatter + +and fmt_token_alias ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) token_alias formatter = + match token_alias with + | TokenAlias {alias} -> + formatter |> Fmt.fmt "Token " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "alias=" |> Scan.Token.pp alias + |> fmt_rcurly ~alt ~width + | TokenAliasEpsilon -> + formatter |> Fmt.fmt "TokenAliasEpsilon" +and pp_token_alias token_alias formatter = + fmt_token_alias token_alias formatter + +and fmt_token ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) token formatter = + let width' = width + 4L in + match token with + | Token {token; cident; token_alias; of_type0; prec_ref} -> + formatter |> Fmt.fmt "Token " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "token=" |> Scan.Token.pp token + |> fmt_semi ~alt ~width + |> Fmt.fmt "cident=" |> fmt_cident ~alt ~width:width' cident + |> fmt_semi ~alt ~width + |> Fmt.fmt "token_alias=" |> fmt_token_alias ~alt ~width:width' token_alias + |> fmt_semi ~alt ~width + |> Fmt.fmt "of_type0=" |> fmt_of_type0 ~alt ~width:width' of_type0 + |> fmt_semi ~alt ~width + |> Fmt.fmt "prec_ref=" |> fmt_prec_ref ~alt ~width:width' prec_ref + |> fmt_rcurly ~alt ~width +and pp_token token formatter = + fmt_token token formatter + +and fmt_sep ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) sep formatter = + match sep with + | SepLineDelim {line_delim} -> + formatter |> Fmt.fmt "SepLineDelim " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "line_delim=" |> Scan.Token.pp line_delim + |> fmt_rcurly ~alt ~width + | SepSemi {semi} -> + formatter |> Fmt.fmt "SepSemi " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "semi=" |> Scan.Token.pp semi + |> fmt_rcurly ~alt ~width + | SepBar {bar} -> + formatter |> Fmt.fmt "SepBar " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "bar=" |> Scan.Token.pp bar + |> fmt_rcurly ~alt ~width +and pp_sep sep formatter = + fmt_sep sep formatter + +and fmt_codes_tl ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) codes_tl formatter = + let width' = width + 4L in + match codes_tl with + | CodesTlSepCode {sep; code; codes_tl} -> + formatter |> Fmt.fmt "CodesTlSepCode " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "sep=" |> fmt_sep ~alt ~width:width' sep + |> fmt_semi ~alt ~width + |> Fmt.fmt "code=" |> fmt_code ~alt ~width:width' code + |> fmt_rcurly ~alt ~width + |> Fmt.fmt "codes_tl=" |> fmt_codes_tl ~alt ~width:width' codes_tl + |> fmt_rcurly ~alt ~width + | CodesTlEpsilon -> formatter |> Fmt.fmt "CodesTlEpsilon" +and pp_codes_tl codes_tl formatter = + fmt_codes codes_tl formatter + +and fmt_codes ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) codes formatter = + let width' = width + 4L in + match codes with + | Codes {code; codes_tl} -> + formatter |> Fmt.fmt "Codes " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "code=" |> fmt_code ~alt ~width:width' code + |> fmt_semi ~alt ~width + |> Fmt.fmt "codes_tl=" |> fmt_codes_tl ~alt ~width:width' codes_tl + |> fmt_rcurly ~alt ~width +and pp_codes codes formatter = + fmt_codes codes formatter + +and fmt_codes0 ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) codes0 formatter = + let width' = width + 4L in + match codes0 with + | Codes0Codes {codes} -> + formatter |> Fmt.fmt "Codes0Codes " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "codes=" |> fmt_codes ~alt ~width:width' codes + |> fmt_rcurly ~alt ~width + | Codes0Epsilon -> + formatter |> Fmt.fmt "Codes0Epsilon" +and pp_codes0 codes formatter = + fmt_codes codes formatter + +and fmt_delimited ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) delimited formatter = + let width' = width + 4L in + match delimited with + | DelimitedBlock {indent; codes; dedent} -> + formatter |> Fmt.fmt "DelimitedBlock " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "indent=" |> Scan.Token.pp indent + |> fmt_semi ~alt ~width + |> Fmt.fmt "codes=" |> fmt_codes ~alt ~width:width' codes + |> fmt_semi ~alt ~width + |> Fmt.fmt "dedent=" |> Scan.Token.pp dedent + |> fmt_rcurly ~alt ~width + | DelimitedParen {lparen; codes0; rparen} -> + formatter |> Fmt.fmt "DelimitedParen " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "lparen=" |> Scan.Token.pp lparen + |> fmt_semi ~alt ~width + |> Fmt.fmt "codes0=" |> fmt_codes0 ~alt ~width:width' codes0 + |> fmt_semi ~alt ~width + |> Fmt.fmt "rparen=" |> Scan.Token.pp rparen + |> fmt_rcurly ~alt ~width + | DelimitedCapture {lcapture; codes0; rcapture} -> + formatter |> Fmt.fmt "DelimitedCapture " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "lcapture=" |> Scan.Token.pp lcapture + |> fmt_semi ~alt ~width + |> Fmt.fmt "codes0=" |> fmt_codes0 ~alt ~width:width' codes0 + |> fmt_semi ~alt ~width + |> Fmt.fmt "rcapture=" |> Scan.Token.pp rcapture + |> fmt_rcurly ~alt ~width + | DelimitedList {lbrack; codes0; rbrack} -> + formatter |> Fmt.fmt "DelimitedList " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "lbrack=" |> Scan.Token.pp lbrack + |> fmt_semi ~alt ~width + |> Fmt.fmt "codes0=" |> fmt_codes0 ~alt ~width:width' codes0 + |> fmt_semi ~alt ~width + |> Fmt.fmt "rbrack=" |> Scan.Token.pp rbrack + |> fmt_rcurly ~alt ~width + | DelimitedArray {larray; codes0; rarray} -> + formatter |> Fmt.fmt "DelimitedArray " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "larray=" |> Scan.Token.pp larray + |> fmt_semi ~alt ~width + |> Fmt.fmt "codes0=" |> fmt_codes0 ~alt ~width:width' codes0 + |> fmt_semi ~alt ~width + |> Fmt.fmt "rarray=" |> Scan.Token.pp rarray + |> fmt_rcurly ~alt ~width + | DelimitedModule {lcurly; codes0; rcurly} -> + formatter |> Fmt.fmt "DelimitedModule " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "lcurly=" |> Scan.Token.pp lcurly + |> fmt_semi ~alt ~width + |> Fmt.fmt "codes0=" |> fmt_codes0 ~alt ~width:width' codes0 + |> fmt_semi ~alt ~width + |> Fmt.fmt "rcurly=" |> Scan.Token.pp rcurly + |> fmt_rcurly ~alt ~width +and pp_delimited delimited formatter = + fmt_delimited delimited formatter + +and fmt_code_tl ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) code_tl formatter = + let width' = width + 4L in + match code_tl with + | CodeTlDelimited {delimited; code_tl} -> + formatter |> Fmt.fmt "CodeTlDelimited " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "delimited=" |> fmt_delimited ~alt ~width:width' delimited + |> fmt_semi ~alt ~width + |> Fmt.fmt "code_tl=" |> fmt_code_tl ~alt ~width:width' code_tl + |> fmt_rcurly ~alt ~width + | CodeTlToken {token; code_tl} -> + formatter |> Fmt.fmt "CodeTlToken " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "token=" |> Scan.Token.pp token + |> fmt_semi ~alt ~width + |> Fmt.fmt "code_tl=" |> fmt_code_tl ~alt ~width:width' code_tl + |> fmt_rcurly ~alt ~width + | CodeTlEpsilon -> + formatter |> Fmt.fmt "CodeTlEpsilon" +and pp_code_tl code_tl formatter = + fmt_code code_tl formatter + +and fmt_code ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) code formatter = + let width' = width + 4L in + match code with + | CodeDelimited {delimited; code_tl} -> + formatter |> Fmt.fmt "CodeDelimited " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "delimited=" |> fmt_delimited ~alt ~width:width' delimited + |> fmt_semi ~alt ~width + |> Fmt.fmt "code_tl=" |> fmt_code_tl ~alt ~width:width' code_tl + |> fmt_rcurly ~alt ~width + | CodeToken {token; code_tl} -> + formatter |> Fmt.fmt "CodeToken " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "token=" |> Scan.Token.pp token + |> fmt_semi ~alt ~width + |> Fmt.fmt "code_tl=" |> fmt_code_tl ~alt ~width:width' code_tl + |> fmt_rcurly ~alt ~width +and pp_code code formatter = + fmt_code code formatter + +and fmt_prod_param_type ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prod_param_type + formatter = + let width' = width + 4L in + match prod_param_type with + | ProdParamTypeCident {cident} -> + formatter |> Fmt.fmt "ProdParamTypeCident " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "cident=" |> fmt_cident ~alt ~width:width' cident + |> fmt_rcurly ~alt ~width + | ProdParamTypeAlias {alias} -> + formatter |> Fmt.fmt "ProdParamTypeAlias " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "alias=" |> Scan.Token.pp alias + |> fmt_rcurly ~alt ~width +and pp_prod_param_type prod_param_type formatter = + fmt_prod_param_type prod_param_type formatter + +and fmt_prod_param_ident ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prod_param_ident + formatter = + let width' = width + 4L in + match prod_param_ident with + | ProdParamIdentIdentColon {ident; colon} -> + formatter |> Fmt.fmt "ProdParamIdentIdentColon " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "ident=" |> fmt_ident ~alt ~width:width' ident + |> fmt_semi ~alt ~width + |> Fmt.fmt "colon=" |> Scan.Token.pp colon + |> fmt_rcurly ~alt ~width + | ProdParamIdentEpsilon -> + formatter |> Fmt.fmt "ProdParamIdentEpsilon" +and pp_prod_param_ident prod_param_ident formatter = + fmt_prod_param_ident prod_param_ident formatter + +and fmt_prod_param ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prod_param formatter = + let width' = width + 4L in + match prod_param with + | ProdParam {prod_param_ident; prod_param_type} -> + formatter |> Fmt.fmt "ProdParam " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prod_param_ident=" |> fmt_prod_param_ident ~alt ~width:width' prod_param_ident + |> fmt_semi ~alt ~width + |> Fmt.fmt "prod_param_type=" |> fmt_prod_param_type ~alt ~width:width' prod_param_type + |> fmt_rcurly ~alt ~width +and pp_prod_param prod_param formatter = + fmt_prod_param prod_param formatter + +and fmt_prod_params_tl ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prod_params_tl formatter = + let width' = width + 4L in + match prod_params_tl with + | ProdParamsTlProdParam {prod_param; prod_params_tl} -> + formatter |> Fmt.fmt "ProdParamsTlProdParam " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prod_param=" |> fmt_prod_param ~alt ~width:width' prod_param + |> fmt_semi ~alt ~width + |> Fmt.fmt "prod_params_tl=" |> fmt_prod_params_tl ~alt ~width:width' prod_params_tl + |> fmt_rcurly ~alt ~width + | ProdParamsTlEpsilon -> + formatter |> Fmt.fmt "ProdParamsTlEpsilon" +and pp_prod_params_tl prod_params_tl formatter = + fmt_prod_params_tl prod_params_tl formatter + +and fmt_prod_params ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prod_params formatter = + let width' = width + 4L in + match prod_params with + | ProdParamsProdParam {prod_param; prod_params_tl} -> + formatter |> Fmt.fmt "ProdParamsProdParam " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prod_param=" |> fmt_prod_param ~alt ~width:width' prod_param + |> fmt_semi ~alt ~width + |> Fmt.fmt "prod_params_tl=" |> fmt_prod_params_tl ~alt ~width:width' prod_params_tl + |> fmt_rcurly ~alt ~width +and pp_prod_params prod_params formatter = + fmt_prod_params prod_params formatter + +and fmt_prod_pattern ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prod_pattern formatter = + let width' = width + 4L in + match prod_pattern with + | ProdPatternParams {prod_params} -> + formatter |> Fmt.fmt "ProdPatternParams " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prod_params=" |> fmt_prod_params ~alt ~width:width' prod_params + |> fmt_rcurly ~alt ~width + | ProdPatternEpsilon {epsilon} -> + formatter |> Fmt.fmt "ProdPatternEpsilon " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "epsilon=" |> Scan.Token.pp epsilon + |> fmt_rcurly ~alt ~width +and pp_prod_pattern prod_pattern formatter = + fmt_prod_pattern prod_pattern formatter + +and fmt_prod ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prod formatter = + let width' = width + 4L in + match prod with + | Prod {prod_pattern; prec_ref} -> + formatter |> Fmt.fmt "Prod " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prod_pattern=" |> fmt_prod_pattern ~alt ~width:width' prod_pattern + |> fmt_semi ~alt ~width + |> Fmt.fmt "prec_ref=" |> fmt_prec_ref ~alt ~width:width' prec_ref + |> fmt_rcurly ~alt ~width +and pp_prod prod formatter = + fmt_prod prod formatter + +and fmt_prods_tl ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prods_tl formatter = + let width' = width + 4L in + match prods_tl with + | ProdsTlBarProd {bar; prod; prods_tl} -> + formatter |> Fmt.fmt "ProdsTlBarProd " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "bar=" |> Scan.Token.pp bar + |> fmt_semi ~alt ~width + |> Fmt.fmt "prod=" |> fmt_prod ~alt ~width:width' prod + |> fmt_semi ~alt ~width + |> Fmt.fmt "prods_tl=" |> fmt_prods_tl ~alt ~width:width' prods_tl + |> fmt_rcurly ~alt ~width + | ProdsTlEpsilon -> + formatter |> Fmt.fmt "ProdsTlEpsilon" +and pp_prods_tl prods_tl formatter = + fmt_prods_tl prods_tl formatter + +and fmt_prods ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prods formatter = + let width' = width + 4L in + match prods with + | ProdsBarProd {bar; prod; prods_tl} -> + formatter |> Fmt.fmt "ProdsBarProd " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "bar=" |> Scan.Token.pp bar + |> fmt_semi ~alt ~width + |> Fmt.fmt "prod=" |> fmt_prod ~alt ~width:width' prod + |> fmt_semi ~alt ~width + |> Fmt.fmt "prods_tl=" |> fmt_prods_tl ~alt ~width:width' prods_tl + |> fmt_rcurly ~alt ~width + | ProdsProd {prod; prods_tl} -> + formatter |> Fmt.fmt "ProdsProd " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prod=" |> fmt_prod ~alt ~width:width' prod + |> fmt_semi ~alt ~width + |> Fmt.fmt "prods_tl=" |> fmt_prods_tl ~alt ~width:width' prods_tl + |> fmt_rcurly ~alt ~width +and pp_prods prods formatter = + fmt_prods prods formatter + +and fmt_reduction ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) reduction formatter = + let width' = width + 4L in + match reduction with + | Reduction {prods; arrow; code} -> + formatter |> Fmt.fmt "Reduction " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prods=" |> fmt_prods ~alt ~width:width' prods + |> fmt_semi ~alt ~width + |> Fmt.fmt "arrow=" |> Scan.Token.pp arrow + |> fmt_semi ~alt ~width + |> Fmt.fmt "code=" |> pp_code code + |> fmt_rcurly ~alt ~width +and pp_reduction reduction formatter = + fmt_reduction reduction formatter + +and fmt_reductions_tl ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) reductions_tl formatter = + let width' = width + 4L in + match reductions_tl with + | ReductionsTlBarReduction {bar; reduction; reductions_tl} -> + formatter |> Fmt.fmt "ReductionsTlBarReduction " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "bar=" |> Scan.Token.pp bar + |> fmt_semi ~alt ~width + |> Fmt.fmt "reduction=" |> fmt_reduction ~alt ~width:width' reduction + |> fmt_semi ~alt ~width + |> Fmt.fmt "reductions_tl=" |> fmt_reductions_tl ~alt ~width:width' reductions_tl + |> fmt_rcurly ~alt ~width + | ReductionsTlEpsilon -> + formatter |> Fmt.fmt "ReductionsTlEpsilon" +and pp_reductions_tl reductions_tl formatter = + fmt_reductions_tl reductions_tl formatter + +and fmt_reductions ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) reductions formatter = + let width' = width + 4L in + match reductions with + | ReductionsBarReduction {bar; reduction; reductions_tl} -> + formatter |> Fmt.fmt "ReductionsBarReduction " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "bar=" |> Scan.Token.pp bar + |> fmt_semi ~alt ~width + |> Fmt.fmt "reduction=" |> fmt_reduction ~alt ~width:width' reduction + |> fmt_semi ~alt ~width + |> Fmt.fmt "reductions_tl=" |> fmt_reductions_tl ~alt ~width:width' reductions_tl + |> fmt_rcurly ~alt ~width + | ReductionsReduction {reduction; reductions_tl} -> + formatter |> Fmt.fmt "ReductionsReduction " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "reduction=" |> fmt_reduction ~alt ~width:width' reduction + |> fmt_semi ~alt ~width + |> Fmt.fmt "reductions_tl=" |> fmt_reductions_tl ~alt ~width:width' reductions_tl + |> fmt_rcurly ~alt ~width +and pp_reductions reductions formatter = + fmt_reductions reductions formatter + +and fmt_nonterm_type ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) nonterm_type formatter = + match nonterm_type with + | NontermTypeNonterm {nonterm} -> + formatter |> Fmt.fmt "NontermTypeNonterm " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "nonterm=" |> Scan.Token.pp nonterm + |> fmt_rcurly ~alt ~width + | NontermTypeStart {start} -> + formatter |> Fmt.fmt "NontermTypeStart " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "start=" |> Scan.Token.pp start + |> fmt_rcurly ~alt ~width +and pp_nonterm_type nonterm_type formatter = + fmt_nonterm_type nonterm_type formatter + +and fmt_nonterm ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) nonterm formatter = + let width' = width + 4L in + match nonterm with + | NontermReductions {nonterm_type; cident; of_type; prec_ref; cce; reductions} -> + formatter |> Fmt.fmt "NontermReductions " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "nonterm_type=" |> fmt_nonterm_type ~alt ~width:width' nonterm_type + |> fmt_semi ~alt ~width + |> Fmt.fmt "cident=" |> fmt_cident ~alt ~width:width' cident + |> fmt_semi ~alt ~width + |> Fmt.fmt "of_type=" |> fmt_of_type ~alt ~width:width' of_type + |> fmt_semi ~alt ~width + |> Fmt.fmt "prec_ref=" |> fmt_prec_ref ~alt ~width:width' prec_ref + |> fmt_semi ~alt ~width + |> Fmt.fmt "cce=" |> Scan.Token.pp cce + |> fmt_semi ~alt ~width + |> Fmt.fmt "reductions=" |> fmt_reductions ~alt ~width:width' reductions + |> fmt_rcurly ~alt ~width + | NontermProds {nonterm_type; cident; prec_ref; cce; prods} -> + formatter |> Fmt.fmt "NontermProds " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "nonterm_type=" |> fmt_nonterm_type ~alt ~width:width' nonterm_type + |> fmt_semi ~alt ~width + |> Fmt.fmt "cident=" |> fmt_cident ~alt ~width:width' cident + |> fmt_semi ~alt ~width + |> Fmt.fmt "prec_ref=" |> fmt_prec_ref ~alt ~width:width' prec_ref + |> fmt_semi ~alt ~width + |> Fmt.fmt "cce=" |> Scan.Token.pp cce + |> fmt_semi ~alt ~width + |> Fmt.fmt "prods=" |> fmt_prods ~alt ~width:width' prods + |> fmt_rcurly ~alt ~width +and pp_nonterm nonterm formatter = + fmt_nonterm nonterm formatter + +and fmt_stmt ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) stmt formatter = + let width' = width + 4L in + match stmt with + | StmtPrec {prec} -> + formatter |> Fmt.fmt "StmtPrec " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prec=" |> fmt_prec ~alt ~width:width' prec + |> fmt_rcurly ~alt ~width + | StmtToken {token} -> + formatter |> Fmt.fmt "StmtToken " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "token=" |> fmt_token ~alt ~width:width' token + |> fmt_rcurly ~alt ~width + | StmtNonterm {nonterm} -> + formatter |> Fmt.fmt "StmtNonterm " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "nonterm=" |> fmt_nonterm ~alt ~width:width' nonterm + |> fmt_rcurly ~alt ~width + | StmtCode {code} -> + formatter |> Fmt.fmt "StmtCode " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "code=" |> fmt_code ~alt ~width:width' code + |> fmt_rcurly ~alt ~width +and pp_stmt stmt formatter = + fmt_stmt stmt formatter + +and fmt_stmts_tl ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) stmts_tl formatter = + let width' = width + 4L in + match stmts_tl with + | StmtsTl {line_delim; stmt; stmts_tl} -> + formatter |> Fmt.fmt "StmtsTl " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "line_delim=" |> Scan.Token.pp line_delim + |> fmt_semi ~alt ~width + |> Fmt.fmt "stmt=" |> fmt_stmt ~alt ~width:width' stmt + |> fmt_semi ~alt ~width + |> Fmt.fmt "stmts_tl=" |> fmt_stmts_tl ~alt ~width:width' stmts_tl + |> fmt_rcurly ~alt ~width + | StmtsTlEpsilon -> + formatter |> Fmt.fmt "StmtsTlEpsilon" +and pp_stmts_tl stmts_tl formatter = + fmt_stmts_tl stmts_tl formatter + +and fmt_stmts ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) stmts formatter = + let width' = width + 4L in + match stmts with + | Stmts {stmt; stmts_tl} -> + formatter |> Fmt.fmt "Stmts " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "stmt=" |> fmt_stmt ~alt ~width:width' stmt + |> fmt_semi ~alt ~width + |> Fmt.fmt "stmts_tl=" |> fmt_stmts_tl ~alt ~width:width' stmts_tl + |> fmt_rcurly ~alt ~width +and pp_stmts stmts formatter = + fmt_stmts stmts formatter + +and fmt_hocc ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) hocc formatter = + let width' = width + 4L in + match hocc with + | Hocc {hocc; indent; stmts; dedent} -> + formatter |> Fmt.fmt "Hocc " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "hocc=" |> Scan.Token.pp hocc + |> fmt_semi ~alt ~width + |> Fmt.fmt "indent=" |> Scan.Token.pp indent + |> fmt_semi ~alt ~width + |> Fmt.fmt "stmts=" |> fmt_stmts ~alt ~width:width' stmts + |> fmt_semi ~alt ~width + |> Fmt.fmt "dedent=" |> Scan.Token.pp dedent + |> fmt_rcurly ~alt ~width +and pp_hocc hocc formatter = + fmt_hocc hocc formatter + +and fmt_eoi ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) eoi formatter = + match eoi with + | Eoi {eoi} -> + formatter |> Fmt.fmt "Eoi " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "eoi=" |> Scan.Token.pp eoi + |> fmt_rcurly ~alt ~width +and pp_eoi eoi formatter = + fmt_eoi eoi formatter + +and fmt_matter ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) matter formatter = + let width' = width + 4L in + match matter with + | Matter {token; matter} -> + formatter |> Fmt.fmt "Matter " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "token=" |> Scan.Token.pp token + |> fmt_semi ~alt ~width + |> Fmt.fmt "matter=" |> fmt_matter ~alt ~width:width' matter + |> fmt_rcurly ~alt ~width + | MatterEpsilon -> + formatter |> Fmt.fmt "MatterEpsilon" +and pp_matter matter formatter = + fmt_matter matter formatter + +and fmt_hmh ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) hmh formatter = + let width' = width + 4L in + match hmh with + | Hmh {prelude; hocc; postlude; eoi} -> + formatter |> Fmt.fmt "Hmh " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prelude=" |> pp_matter prelude + |> fmt_semi ~alt ~width + |> Fmt.fmt "hocc=" |> fmt_hocc ~alt ~width:width' hocc + |> fmt_semi ~alt ~width + |> Fmt.fmt "postlude=" |> pp_matter postlude + |> fmt_semi ~alt ~width + |> Fmt.fmt "eoi=" |> fmt_eoi ~alt ~width:width' eoi + |> fmt_rcurly ~alt ~width +and pp_hmh hmh formatter = + fmt_hmh hmh formatter + +and fmt_hmhi ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) hmhi formatter = + let width' = width + 4L in + match hmhi with + | Hmhi {prelude; hocc; postlude; eoi} -> + formatter |> Fmt.fmt "Hmhi " + |> fmt_lcurly ~alt ~width + |> Fmt.fmt "prelude=" |> pp_matter prelude + |> fmt_semi ~alt ~width + |> Fmt.fmt "hocc=" |> Scan.Token.pp hocc + |> fmt_semi ~alt ~width + |> Fmt.fmt "postlude=" |> pp_matter postlude + |> fmt_semi ~alt ~width + |> Fmt.fmt "eoi=" |> fmt_eoi ~alt ~width:width' eoi + |> fmt_rcurly ~alt ~width +and pp_hmhi hmhi formatter = + fmt_hmhi hmhi formatter + +(**************************************************************************************************) +(* Recursive descent parser. *) + +let trace = false + +type ctx = { + scanner: Scan.t; + errs: Error.t list; +} + +let pp_ctx {scanner; errs} formatter = + formatter + |> Fmt.fmt "{scanner=" |> Scan.pp scanner + |> Fmt.fmt "; errs=" |> (List.pp Error.pp) errs + |> Fmt.fmt "}" + +let rec next ?(all=false) spine ({scanner; errs} as ctx) = + let scanner', tok = Scan.next scanner in + let _ = if trace then + File.Fmt.stderr + |> Fmt.fmt "hocc (trace): next ~all:" |> Bool.pp all + |> Fmt.fmt " " |> (List.pp String.pp) (List.rev spine) + |> Fmt.fmt " " |> pp_ctx ctx |> ignore + in + let errs' = List.fold (Scan.Token.malformations tok) ~init:errs ~f:(fun accum mal -> + Error.init_mal mal :: accum) in + let ctx' = {scanner=scanner'; errs=errs'} in + match all, tok with + | _, HmcToken {atok=Tok_whitespace; _} + | false, HmcToken {atok=(Tok_hash_comment|Tok_paren_comment _); _} -> begin + let _ = if trace then + File.Fmt.stderr |> Fmt.fmt " -> recurse (" |> Scan.Token.pp tok |> Fmt.fmt ")\n" |> ignore + in + next ~all spine ctx' + end + | _ -> begin + let _ = if trace then + File.Fmt.stderr |> Fmt.fmt " -> " |> Scan.Token.pp tok |> Fmt.fmt "\n" |> ignore in + ctx', tok + end + +let err msg {scanner; errs} = + {scanner; errs=(Error.init_scanner scanner msg) :: errs} + +let err_token tok msg {scanner; errs} = + {scanner; errs=(Error.init_token tok msg) :: errs} + +let reduce ?(alt=true) spine ctx + (fmt_t: ?alt:bool -> ?width:uns -> 'a -> (module Fmt.Formatter) -> (module Fmt.Formatter)) t = + let _ = if trace then + File.Fmt.stderr |> Fmt.fmt "hocc (trace): reduce " |> (List.pp String.pp) (List.rev spine) + |> Fmt.fmt " " |> pp_ctx ctx |> Fmt.fmt " " |> fmt_t ~alt t |> Fmt.fmt "\n" |> ignore + in + ctx, Some t + +(* Map optional subtree result, passing the resulting ctx in to enable tail recursion. *) +let mapr ~child ~f spine ctx = + let ctx', child_opt = child spine ctx in + match child_opt with + | None -> ctx', None + | Some c -> f spine ctx' c + +(* Map optional subtree result, without support for tail recursion. *) +let map ~child ~f + ~(fmt_child: ?alt:bool -> ?width:uns -> 'a -> (module Fmt.Formatter) -> (module Fmt.Formatter)) + spine ctx = + mapr ~child ~f:(fun spine ctx' c -> reduce spine ctx' fmt_child (f c)) spine ctx + +let rec uident spine ctx = + let spine = match trace with true -> "uident" :: spine | false -> [] in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_uident _; _} as uident -> + reduce spine ctx' fmt_uident (Uident {uident}) + | _ -> err_token tok "Expected uident" ctx, None + +and cident spine ctx = + let spine = "cident" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_cident _; _} as cident -> + reduce spine ctx' fmt_cident (Cident {cident}) + | _ -> err_token tok "Expected cident" ctx, None + +and ident spine ctx = + let spine = "ident" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_uident _; _} as uident -> + reduce spine ctx' fmt_ident (IdentUident {uident=Uident {uident}}) + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_cident _; _} as cident -> + reduce spine ctx' fmt_ident (IdentCident {cident=Cident {cident}}) + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_uscore; _} as uscore -> + reduce spine ctx' fmt_ident (IdentUscore {uscore}) + | _ -> err_token tok "Expected ident" ctx, None + +and precs_tl spine ctx = + let spine = "precs_tl" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_comma; _} as comma -> + mapr ~child:uident ~f:(fun spine ctx' uident -> + map ~child:precs_tl ~f:(fun precs_tl -> + PrecsTlCommaUident {comma; uident; precs_tl} + ) ~fmt_child:fmt_precs_tl spine ctx' + ) spine ctx' + | _ -> reduce spine ctx fmt_precs_tl PrecsTlEpsilon + +and precs spine ctx = + let spine = "precs" :: spine in + mapr ~child:uident ~f:(fun spine ctx' uident -> + map ~child:precs_tl ~f:(fun precs_tl -> + Precs {uident; precs_tl} + ) ~fmt_child:fmt_precs spine ctx' + ) spine ctx + +and prec_rels spine ctx = + let spine = "prec_rels" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_lt; _} as lt -> + map ~child:precs ~f:(fun precs -> + PrecRelsLtPrecs {lt; precs} + ) ~fmt_child:fmt_prec_rels spine ctx' + | _ -> reduce spine ctx fmt_prec_rels PrecRelsEpsilon + +and prec_type spine ctx = + let spine = "prec_type" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HoccToken {atok=Scan.AbstractToken.Tok_prec; _} as prec -> + reduce spine ctx' fmt_prec_type (PrecTypePrec {prec}) + | HoccToken {atok=Scan.AbstractToken.Tok_left; _} as left -> + reduce spine ctx' fmt_prec_type (PrecTypeLeft {left}) + | HoccToken {atok=Scan.AbstractToken.Tok_right; _} as right -> + reduce spine ctx' fmt_prec_type (PrecTypeRight {right}) + | _ -> err_token tok "Expected precedence type" ctx, None + +and prec spine ctx = + let spine = "prec" :: spine in + mapr ~child:prec_type ~f:(fun spine ctx' prec_type -> + mapr ~child:uident ~f:(fun spine ctx' uident -> + map ~child:prec_rels ~f:(fun prec_rels -> + Prec {prec_type; uident; prec_rels} + ) ~fmt_child:fmt_prec spine ctx' + ) spine ctx' + ) spine ctx + +and of_type spine ctx = + let spine = "of_type" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_of; _} as of_ -> + let dot spine ctx = begin + let spine = "dot" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_dot; _} -> ctx', Some tok + | _ -> err_token tok "Expected '.'" ctx, None + end in + mapr ~child:cident ~f:(fun spine ctx' type_module -> + mapr ~child:dot ~f:(fun spine ctx' dot -> + map ~child:uident ~f:(fun type_type -> + OfType {of_; type_module; dot; type_type} + ) ~fmt_child:fmt_of_type spine ctx' + ) spine ctx' + ) spine ctx' + | _ -> err_token tok "Expected 'of'" ctx, None + +and of_type0 spine ctx = + let spine = "of_type0" :: spine in + let _ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_of; _} -> + map ~child:of_type ~f:(fun of_type -> + OfType0OfType {of_type} + ) ~fmt_child:fmt_of_type0 spine ctx + | _ -> reduce spine ctx fmt_of_type0 OfType0Epsilon + +and prec_ref spine ctx = + let spine = "prec_ref" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HoccToken {atok=Scan.AbstractToken.Tok_prec; _} as prec -> + map ~child:uident ~f:(fun uident -> + PrecRefPrecUident {prec; uident} + ) ~fmt_child:fmt_prec_ref spine ctx' + | _ -> reduce spine ctx fmt_prec_ref PrecRefEpsilon + +and token_alias spine ctx = + let spine = "token_alias" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_istring _; _} as alias -> + reduce spine ctx' fmt_token_alias (TokenAlias {alias}) + | _ -> reduce spine ctx fmt_token_alias TokenAliasEpsilon + +and token spine ctx = + let ctx', tok = next spine ctx in + match tok with + | HoccToken {atok=Scan.AbstractToken.Tok_token; _} as token -> + mapr ~child:cident ~f:(fun spine ctx' cident -> + mapr ~child:token_alias ~f:(fun spine ctx' token_alias -> + mapr ~child:of_type0 ~f:(fun spine ctx' of_type0 -> + map ~child:prec_ref ~f:(fun prec_ref -> + Token {token; cident; token_alias; of_type0; prec_ref} + ) ~fmt_child:fmt_token spine ctx' + ) spine ctx' + ) spine ctx' + ) spine ctx' + | _ -> err_token tok "Expected 'token' statement" ctx, None + +and sep spine ctx = + let spine = "sep" :: spine in + let ctx', tok = next ~all:true spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_line_delim; _} as line_delim -> + reduce spine ctx' fmt_sep (SepLineDelim {line_delim}) + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_semi; _} as semi -> + reduce spine ctx' fmt_sep (SepSemi {semi}) + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_bar; _} as bar -> + reduce spine ctx' fmt_sep (SepBar {bar}) + | _ -> ctx, None + +and codes_tl spine ctx = + let spine = "codes_tl" :: spine in + let _ctx', tok = next ~all:true spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_line_delim; _} + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_semi; _} + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_bar; _} -> + mapr ~child:sep ~f:(fun spine ctx' sep -> + mapr ~child:code ~f:(fun spine ctx' code -> + map ~child:codes_tl ~f:(fun codes_tl -> + CodesTlSepCode {sep; code; codes_tl} + ) ~fmt_child:fmt_codes_tl spine ctx' + ) spine ctx' + ) spine ctx + | _ -> reduce spine ctx fmt_codes_tl CodesTlEpsilon + +and codes spine ctx = + let spine = "codes" :: spine in + mapr ~child:code ~f:(fun spine ctx' code -> + map ~child:codes_tl ~f:(fun codes_tl -> + Codes {code; codes_tl} + ) ~fmt_child:fmt_codes spine ctx' + ) spine ctx + +and codes0 spine ctx = + let spine = "codes0" :: spine in + let ctx', codes_opt = codes spine ctx in + match codes_opt with + | Some codes -> reduce spine ctx' fmt_codes0 (Codes0Codes {codes}) + | None -> reduce spine ctx fmt_codes0 Codes0Epsilon + +and indent spine ctx = + let spine = "indent" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_indent _; _} -> ctx', Some tok + | _ -> err_token tok "Expected indent" ctx, None + +and dedent ?all spine ctx = + let spine = "dedent" :: spine in + let ctx', tok = next ?all spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_dedent _; _} -> ctx', Some tok + | _ -> err_token tok "Expected dedent" ctx, None + +and rparen ?all spine ctx = + let spine = "rparen" :: spine in + let ctx', tok = next ?all spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_rparen; _} -> ctx', Some tok + | _ -> err_token tok "Expected ')'" ctx, None + +and rcapture ?all spine ctx = + let spine = "rcapture" :: spine in + let ctx', tok = next ?all spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_rcapture; _} -> ctx', Some tok + | _ -> err_token tok "Expected '|)'" ctx, None + +and rbrack ?all spine ctx = + let spine = "rbrack" :: spine in + let ctx', tok = next ?all spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_rbrack; _} -> ctx', Some tok + | _ -> err_token tok "Expected ']'" ctx, None + +and rarray ?all spine ctx = + let spine = "rarray" :: spine in + let ctx', tok = next ?all spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_rarray; _} -> ctx', Some tok + | _ -> err_token tok "Expected '|]'" ctx, None + +and rcurly ?all spine ctx = + let spine = "rcurly" :: spine in + let ctx', tok = next ?all spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_rcurly; _} -> ctx', Some tok + | _ -> err_token tok "Expected '}'" ctx, None + +and delimited spine ctx = + let spine = "delimited" :: spine in + let ctx', tok = next ~all:true spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_indent _; _} as indent -> + mapr ~child:codes ~f:(fun spine ctx' codes -> + map ~child:(dedent ~all:true) ~f:(fun dedent -> + DelimitedBlock {indent; codes; dedent} + ) ~fmt_child:fmt_delimited spine ctx' + ) spine ctx' + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_lparen; _} as lparen -> + mapr ~child:codes0 ~f:(fun spine ctx' codes0 -> + map ~child:(rparen ~all:true) ~f:(fun rparen -> + DelimitedParen {lparen; codes0; rparen} + ) ~fmt_child:fmt_delimited spine ctx' + ) spine ctx' + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_lcapture; _} as lcapture -> + mapr ~child:codes0 ~f:(fun spine ctx' codes0 -> + map ~child:(rcapture ~all:true) ~f:(fun rcapture -> + DelimitedCapture {lcapture; codes0; rcapture} + ) ~fmt_child:fmt_delimited spine ctx' + ) spine ctx' + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_lbrack; _} as lbrack -> + mapr ~child:codes0 ~f:(fun spine ctx' codes0 -> + map ~child:(rbrack ~all:true) ~f:(fun rbrack -> + DelimitedList {lbrack; codes0; rbrack} + ) ~fmt_child:fmt_delimited spine ctx' + ) spine ctx' + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_larray; _} as larray -> + mapr ~child:codes0 ~f:(fun spine ctx' codes0 -> + map ~child:(rarray ~all:true) ~f:(fun rarray -> + DelimitedArray {larray; codes0; rarray} + ) ~fmt_child:fmt_delimited spine ctx' + ) spine ctx' + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_lcurly; _} as lcurly -> + mapr ~child:codes0 ~f:(fun spine ctx' codes0 -> + map ~child:(rcurly ~all:true) ~f:(fun rcurly -> + DelimitedModule {lcurly; codes0; rcurly} + ) ~fmt_child:fmt_delimited spine ctx' + ) spine ctx' + | _ -> err_token tok "Expected left delimiter" ctx, None + +and code_tl spine ctx = + let spine = "code_tl" :: spine in + let ctx', tok = next ~all:true spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.(Tok_indent _|Tok_lparen|Tok_lcapture|Tok_lbrack + |Tok_larray|Tok_lcurly); _} -> + mapr ~child:delimited ~f:(fun spine ctx' delimited -> + map ~child:code_tl ~f:(fun code_tl -> + CodeTlDelimited {delimited; code_tl} + ) ~fmt_child:fmt_code_tl spine ctx' + ) spine ctx + | HmcToken {atok=Hmc.Scan.AbstractToken.(Tok_dedent _|Tok_rparen|Tok_rcapture|Tok_rbrack + |Tok_rarray|Tok_rcurly + |Tok_line_delim|Tok_semi|Tok_bar); _} -> + reduce spine ctx fmt_code_tl CodeTlEpsilon + | HmcToken _ as token -> + map ~child:code_tl ~f:(fun code_tl -> + CodeTlToken {token; code_tl} + ) ~fmt_child:fmt_code_tl spine ctx' + | _ -> reduce spine ctx fmt_code_tl CodeTlEpsilon + +and code spine ctx = + let spine = "code" :: spine in + let ctx', tok = next ~all:true spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.(Tok_indent _|Tok_lparen|Tok_lcapture|Tok_lbrack + |Tok_larray|Tok_lcurly); _} -> + mapr ~child:delimited ~f:(fun spine ctx' delimited -> + map ~child:code_tl ~f:(fun code_tl -> + CodeDelimited {delimited; code_tl} + ) ~fmt_child:fmt_code spine ctx' + ) spine ctx + | HmcToken {atok=Hmc.Scan.AbstractToken.(Tok_dedent _|Tok_rparen|Tok_rcapture|Tok_rbrack + |Tok_rarray|Tok_rcurly + |Tok_line_delim|Tok_semi|Tok_bar); _} -> + err_token tok "Expected Hemlock code" ctx, None + | HmcToken _ as token -> + map ~child:code_tl ~f:(fun code_tl -> + CodeToken {token; code_tl} + ) ~fmt_child:fmt_code spine ctx' + | _ -> err_token tok "Expected Hemlock code" ctx, None + +and prod_param_type spine ctx = + let spine = "prod_param_type" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_cident _; _} as cident -> + reduce spine ctx' fmt_prod_param_type (ProdParamTypeCident {cident=Cident {cident}}) + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_istring _; _} as alias -> + reduce spine ctx' fmt_prod_param_type (ProdParamTypeAlias {alias}) + | _ -> err_token tok "Expected production parameter type" ctx, None + +and prod_param_ident spine ctx = + let spine = "prod_param_ident" :: spine in + let colon spine ctx = begin + let spine = "colon" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_colon; _} -> ctx', Some tok + | _ -> err_token tok "Expected ':'" ctx, None + end in + let ctx', ident_colon_opt = mapr ~child:ident ~f:(fun spine ctx' ident -> + map ~child:colon ~f:(fun colon -> + ProdParamIdentIdentColon {ident; colon} + ) ~fmt_child:fmt_prod_param_ident spine ctx' + ) spine ctx + in + match ident_colon_opt with + | Some _ -> ctx', ident_colon_opt + | None -> reduce spine ctx fmt_prod_param_ident ProdParamIdentEpsilon + +and prod_param spine ctx = + let spine = "prod_param" :: spine in + mapr ~child:prod_param_ident ~f:(fun spine ctx' prod_param_ident -> + map ~child:prod_param_type ~f:(fun prod_param_type -> + ProdParam {prod_param_ident; prod_param_type} + ) ~fmt_child:fmt_prod_param spine ctx' + ) spine ctx + +and prod_params_tl spine ctx = + let spine = "prod_params_tl" :: spine in + let ctx', prod_param_opt = prod_param spine ctx in + match prod_param_opt with + | Some prod_param -> + map ~child:prod_params_tl ~f:(fun prod_params_tl -> + ProdParamsTlProdParam {prod_param; prod_params_tl} + ) ~fmt_child:fmt_prod_params_tl spine ctx' + | None -> reduce spine ctx fmt_prod_params_tl ProdParamsTlEpsilon + +and prod_params spine ctx = + let spine = "prod_params" :: spine in + mapr ~child:prod_param ~f:(fun spine ctx' prod_param -> + map ~child:prod_params_tl ~f:(fun prod_params_tl -> + ProdParamsProdParam {prod_param; prod_params_tl} + ) ~fmt_child:fmt_prod_params spine ctx' + ) spine ctx + +and prod_pattern spine ctx = + let spine = "prod_pattern" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HoccToken {atok=Scan.AbstractToken.Tok_epsilon; _} as epsilon -> + reduce spine ctx' fmt_prod_pattern (ProdPatternEpsilon {epsilon}) + | _ -> + map ~child:prod_params ~f:(fun prod_params -> + ProdPatternParams {prod_params} + ) ~fmt_child:fmt_prod_pattern spine ctx + +and prod spine ctx = + let spine = "prod" :: spine in + mapr ~child:prod_pattern ~f:(fun spine ctx' prod_pattern -> + map ~child:prec_ref ~f:(fun prec_ref -> + Prod {prod_pattern; prec_ref} + ) ~fmt_child:fmt_prod spine ctx' + ) spine ctx + +and prods_tl spine ctx = + let spine = "prods_tl" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_bar; _} as bar -> + mapr ~child:prod ~f:(fun spine ctx' prod -> + map ~child:prods_tl ~f:(fun prods_tl -> + ProdsTlBarProd {bar; prod; prods_tl} + ) ~fmt_child:fmt_prods_tl spine ctx' + ) spine ctx' + | _ -> reduce spine ctx fmt_prods_tl ProdsTlEpsilon + +and prods spine ctx = + let spine = "prods" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_bar; _} as bar -> + mapr ~child:prod ~f:(fun spine ctx' prod -> + map ~child:prods_tl ~f:(fun prods_tl -> + ProdsBarProd {bar; prod; prods_tl} + ) ~fmt_child:fmt_prods spine ctx' + ) spine ctx' + | _ -> + mapr ~child:prod ~f:(fun spine ctx' prod -> + map ~child:prods_tl ~f:(fun prods_tl -> + ProdsProd {prod; prods_tl} + ) ~fmt_child:fmt_prods spine ctx' + ) spine ctx + +and reduction spine ctx = + let spine = "reduction" :: spine in + let arrow spine ctx = begin + let spine = "arrow" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_arrow; _} -> ctx', Some tok + | _ -> err_token tok "Expected '->'" ctx, None + end in + mapr ~child:prods ~f:(fun spine ctx' prods -> + mapr ~child:arrow ~f:(fun spine ctx' arrow -> + map ~child:code ~f:(fun code -> + Reduction {prods; arrow; code} + ) ~fmt_child:fmt_reduction spine ctx' + ) spine ctx' + ) spine ctx + +and reductions_tl spine ctx = + let spine = "reductions_tl" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_bar; _} as bar -> + mapr ~child:reduction ~f:(fun spine ctx' reduction -> + map ~child:reductions_tl ~f:(fun reductions_tl -> + ReductionsTlBarReduction {bar; reduction; reductions_tl} + ) ~fmt_child:fmt_reductions_tl spine ctx' + ) spine ctx' + | _ -> reduce spine ctx fmt_reductions_tl ReductionsTlEpsilon + +and reductions spine ctx = + let spine = "reductions" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_bar; _} as bar -> + mapr ~child:reduction ~f:(fun spine ctx' reduction -> + map ~child:reductions_tl ~f:(fun reductions_tl -> + ReductionsBarReduction {bar; reduction; reductions_tl} + ) ~fmt_child:fmt_reductions spine ctx' + ) spine ctx' + | _ -> + mapr ~child:reduction ~f:(fun spine ctx' reduction -> + map ~child:reductions_tl ~f:(fun reductions_tl -> + ReductionsReduction {reduction; reductions_tl} + ) ~fmt_child:fmt_reductions spine ctx' + ) spine ctx + +and nonterm_type spine ctx = + let spine = "nonterm_type" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HoccToken {atok=Scan.AbstractToken.Tok_nonterm; _} as nonterm -> + reduce spine ctx' fmt_nonterm_type (NontermTypeNonterm {nonterm}) + | HoccToken {atok=Scan.AbstractToken.Tok_start; _} as start -> + reduce spine ctx' fmt_nonterm_type (NontermTypeStart {start}) + | _ -> err_token tok "Expected 'nonterm'/'start'" ctx, None + +and nonterm spine ctx = + let spine = "nonterm" :: spine in + let cce spine ctx = begin + let spine = "cce" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_colon_op "::="; _} -> ctx', Some tok + | _ -> err_token tok "Expected '::='" ctx, None + end in + mapr ~child:nonterm_type ~f:(fun spine ctx' nonterm_type -> + mapr ~child:cident ~f:(fun spine ctx' cident -> + let _ctx'', tok = next spine ctx' in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_of; _} -> + mapr ~child:of_type ~f:(fun spine ctx' of_type -> + mapr ~child:prec_ref ~f:(fun spine ctx' prec_ref -> + mapr ~child:cce ~f:(fun spine ctx' cce -> + map ~child:reductions ~f:(fun reductions -> + NontermReductions {nonterm_type; cident; of_type; prec_ref; cce; reductions} + ) ~fmt_child:fmt_nonterm spine ctx' + ) spine ctx' + ) spine ctx' + ) spine ctx' + | _ -> + mapr ~child:prec_ref ~f:(fun spine ctx' prec_ref -> + mapr ~child:cce ~f:(fun spine ctx' cce -> + map ~child:prods ~f:(fun prods -> + NontermProds {nonterm_type; cident; prec_ref; cce; prods} + ) ~fmt_child:fmt_nonterm spine ctx' + ) spine ctx' + ) spine ctx' + ) spine ctx' + ) spine ctx + +and stmt spine ctx = + let spine = "stmt" :: spine in + let _ctx', tok = next spine ctx in + match tok with + | HoccToken {atok=Scan.AbstractToken.(Tok_prec|Tok_left|Tok_right); _} -> + map ~child:prec ~f:(fun prec -> StmtPrec {prec}) ~fmt_child:fmt_stmt spine ctx + | HoccToken {atok=Scan.AbstractToken.Tok_token; _} -> + map ~child:token ~f:(fun token -> StmtToken {token}) ~fmt_child:fmt_stmt spine ctx + | HoccToken {atok=Scan.AbstractToken.(Tok_nonterm|Tok_start); _} -> + map ~child:nonterm ~f:(fun nonterm -> StmtNonterm {nonterm}) ~fmt_child:fmt_stmt spine ctx + | _ -> map ~child:code ~f:(fun code -> StmtCode {code}) ~fmt_child:fmt_stmt spine ctx + +and stmts_tl spine ctx = + let spine = "stmts_tl" :: spine in + let line_delim spine ctx = begin + let spine = "line_delim" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_line_delim; _} -> ctx', Some tok + | _ -> err_token tok "Expected line delimiter" ctx, None + end in + let ctx', line_delim_opt = line_delim spine ctx in + match line_delim_opt with + | Some line_delim -> begin + mapr ~child:stmt ~f:(fun spine ctx' stmt -> + map ~child:stmts_tl ~f:(fun stmts_tl -> + StmtsTl {line_delim; stmt; stmts_tl} + ) ~fmt_child:fmt_stmts_tl spine ctx' + ) spine ctx' + end + | None -> reduce spine ctx fmt_stmts_tl StmtsTlEpsilon + +and stmts spine ctx = + let spine = "stmts" :: spine in + mapr ~child:stmt ~f:(fun spine ctx' stmt -> + map ~child:stmts_tl ~f:(fun stmts_tl -> + Stmts {stmt; stmts_tl} + ) ~fmt_child:fmt_stmts spine ctx' + ) spine ctx + +and hocc spine ctx = + let spine = "hocc" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HoccToken {atok=Scan.AbstractToken.Tok_hocc; _} as hocc -> + mapr ~child:indent ~f:(fun spine ctx' indent -> + mapr ~child:stmts ~f:(fun spine ctx' stmts -> + map ~child:dedent ~f:(fun dedent -> + Hocc {hocc; indent; stmts; dedent} + ) ~fmt_child:fmt_hocc spine ctx' + ) spine ctx' + ) spine ctx' + | _ -> err_token tok "Expected 'hocc' statement" ctx, None + +and eoi spine ctx = + let spine = "eoi" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_end_of_input; _} as eoi -> + reduce spine ctx' fmt_eoi (Eoi {eoi}) + | _ -> err "Unexpected token before eoi" ctx, None + +and matter spine ctx = + let spine = "matter" :: spine in + let rec f ctx = begin + let ctx', tok = next ~all:true spine ctx in + match tok with + | HoccToken _ + | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_end_of_input; _} -> ctx, MatterEpsilon + | HmcToken _ -> begin + let ctx', matter_rchild = f ctx' in + ctx', Matter {token=tok; matter=matter_rchild} + end + end in + let ctx', matter = f ctx in + reduce ~alt:false spine ctx' fmt_matter matter + +and hmh scanner = + let spine = ["hmh"] in + let ctx = {scanner; errs=[]} in + let ctx', hmh_opt = + mapr ~child:matter ~f:(fun spine ctx' prelude -> + mapr ~child:hocc ~f:(fun spine ctx' hocc -> + mapr ~child:matter ~f:(fun spine ctx' postlude -> + map ~child:eoi ~f:(fun eoi -> + Hmh {prelude; hocc; postlude; eoi} + ) ~fmt_child:fmt_hmh spine ctx' + ) spine ctx' + ) spine ctx' + ) spine ctx + in + match hmh_opt with + | None -> ctx'.scanner, Error ctx'.errs + | Some hmh -> ctx'.scanner, Ok hmh + +and hmhi scanner = + let spine = ["hmhi"] in + let hocc spine ctx = begin + let spine = "hocc" :: spine in + let ctx', tok = next spine ctx in + match tok with + | HoccToken {atok=Scan.AbstractToken.Tok_hocc; _} as hocc -> ctx', Some hocc + | _ -> err "Expected 'hocc' keyword" ctx, None + end in + let ctx = {scanner; errs=[]} in + let ctx', hmh_opt = + mapr ~child:matter ~f:(fun spine ctx' prelude -> + mapr ~child:hocc ~f:(fun spine ctx' hocc -> + mapr ~child:matter ~f:(fun spine ctx' postlude -> + map ~child:eoi ~f:(fun eoi -> + Hmhi {prelude; hocc; postlude; eoi} + ) ~fmt_child:fmt_hmhi spine ctx' + ) spine ctx' + ) spine ctx' + ) spine ctx + in + match hmh_opt with + | None -> ctx'.scanner, Error ctx'.errs + | Some hmh -> ctx'.scanner, Ok hmh diff --git a/bootstrap/bin/hocc/prec.ml b/bootstrap/bin/hocc/prec.ml new file mode 100644 index 000000000..7809f58a1 --- /dev/null +++ b/bootstrap/bin/hocc/prec.ml @@ -0,0 +1,22 @@ +open Basis +open Basis.Rudiments + +type t = { + index: uns; + name: string; + assoc: Assoc.t option; + doms: (uns, Uns.cmper_witness) Ordset.t; + stmt: Parse.prec; +} + +let pp {index; name; assoc; doms; stmt} formatter = + formatter + |> Fmt.fmt "{index=" |> Uns.pp index + |> Fmt.fmt "; name=" |> String.pp name + |> Fmt.fmt "; assoc=" |> (Option.pp Assoc.pp) assoc + |> Fmt.fmt "; doms=" |> (List.pp Uns.pp) (Ordset.to_list doms) + |> Fmt.fmt "; stmt=" |> Parse.fmt_prec stmt + |> Fmt.fmt "}" + +let init ~index ~name ~assoc ~doms ~stmt = + {index; name; assoc; doms; stmt} diff --git a/bootstrap/bin/hocc/prec.mli b/bootstrap/bin/hocc/prec.mli new file mode 100644 index 000000000..d1f8c2b47 --- /dev/null +++ b/bootstrap/bin/hocc/prec.mli @@ -0,0 +1,15 @@ +open Basis +open Basis.Rudiments + +type t = { + index: uns; + name: string; + assoc: Assoc.t option; + doms: (uns, Uns.cmper_witness) Ordset.t; + stmt: Parse.prec; +} + +include FormattableIntf.SMono with type t := t + +val init: index:uns -> name:string -> assoc:(Assoc.t option) + -> doms:(uns, Uns.cmper_witness) Ordset.t -> stmt:Parse.prec -> t diff --git a/bootstrap/bin/hocc/scan.ml b/bootstrap/bin/hocc/scan.ml new file mode 100644 index 000000000..63a528bc5 --- /dev/null +++ b/bootstrap/bin/hocc/scan.ml @@ -0,0 +1,113 @@ +open Basis +open! Basis.Rudiments + +module AbstractToken = struct + type t = + | Tok_hocc + | Tok_token + | Tok_nonterm + | Tok_start + | Tok_epsilon + | Tok_prec + | Tok_left + | Tok_right + + let pp t formatter = + formatter |> Fmt.fmt (match t with + | Tok_hocc -> "Tok_hocc" + | Tok_token -> "Tok_token" + | Tok_nonterm -> "Tok_nonterm" + | Tok_start -> "Tok_start" + | Tok_epsilon -> "Tok_epsilon" + | Tok_prec -> "Tok_prec" + | Tok_left -> "Tok_left" + | Tok_right -> "Tok_right" + ) + + let malformations = function + | Tok_hocc | Tok_token | Tok_nonterm | Tok_start | Tok_epsilon | Tok_prec | Tok_left | Tok_right + -> [] +end + +module ConcreteToken = struct + type t = { + atok: AbstractToken.t; + source: Hmc.Source.Slice.t; + } + + let atok t = + t.atok + + let source t = + t.source + + let pp t formatter = + formatter + |> Fmt.fmt "{atok=" |> AbstractToken.pp t.atok + |> Fmt.fmt "; source=" |> Hmc.Source.Slice.pp t.source + |> Fmt.fmt "}" +end + +module Token = struct + type t = + | HmcToken of Hmc.Scan.ConcreteToken.t + | HoccToken of ConcreteToken.t + + let source = function + | HmcToken ctok -> Hmc.Scan.ConcreteToken.source ctok + | HoccToken ctok -> ConcreteToken.source ctok + + let pp t formatter = + match t with + | HmcToken ctok -> formatter |> Fmt.fmt "HmcToken " |> Hmc.Scan.ConcreteToken.pp ctok + | HoccToken ctok -> formatter |> Fmt.fmt "HoccToken " |> ConcreteToken.pp ctok + + let malformations = function + | HmcToken {atok; _} -> Hmc.Scan.AbstractToken.malformations atok + | HoccToken {atok; _} -> AbstractToken.malformations atok +end + +type t = { + scan: Hmc.Scan.t; + next: (t * Token.t) Lazy.t; +} + +let pp {scan; _} formatter = + Hmc.Scan.pp scan formatter + +let rec susp_next scan = lazy begin + let scan', ctok = Hmc.Scan.next scan in + let ctok' = match Hmc.Scan.ConcreteToken.atok ctok with + | Tok_uident (Constant uident) -> begin + let open AbstractToken in + let source = Hmc.Scan.ConcreteToken.source ctok in + match uident with + | "hocc" -> Token.HoccToken {atok=Tok_hocc; source} + | "token" -> Token.HoccToken {atok=Tok_token; source} + | "nonterm" -> Token.HoccToken {atok=Tok_nonterm; source} + | "start" -> Token.HoccToken {atok=Tok_start; source} + | "epsilon" -> Token.HoccToken {atok=Tok_epsilon; source} + | "prec" -> Token.HoccToken {atok=Tok_prec; source} + | "left" -> Token.HoccToken {atok=Tok_left; source} + | "right" -> Token.HoccToken {atok=Tok_right; source} + | _ -> Token.HmcToken ctok + end + | _ -> Token.HmcToken ctok + in + let t' = {scan=scan'; next=susp_next scan'} in + t', ctok' +end + +let init text = + let scan = Hmc.Scan.init text in + let next = susp_next scan in + {scan; next} + +let text {scan; _} = + Hmc.Scan.text scan + +let cursor {scan; _} = + Hmc.Scan.cursor scan + +let next {next; _} = + Lazy.force next diff --git a/bootstrap/bin/hocc/scan.mli b/bootstrap/bin/hocc/scan.mli new file mode 100644 index 000000000..a550a9226 --- /dev/null +++ b/bootstrap/bin/hocc/scan.mli @@ -0,0 +1,69 @@ +(** Thin wrapper around Hmc's scanner that adds hocc-specific keywords. *) + +open Basis +open! Basis.Rudiments + +module AbstractToken: sig + type t = + | Tok_hocc + | Tok_token + | Tok_nonterm + | Tok_start + | Tok_epsilon + | Tok_prec + | Tok_left + | Tok_right + + val pp: t -> (module Fmt.Formatter) -> (module Fmt.Formatter) + + val malformations: t -> Hmc.Scan.AbstractToken.Rendition.Malformation.t list + (** [malformations t] returns a list of malformations associated with [t], or an empty list if + there are no malformations. This function can be used on any token variant, even if no + malformations are possible. *) +end + +module ConcreteToken : sig + type t = { + atok: AbstractToken.t; + source: Hmc.Source.Slice.t; + } + + val atok: t -> AbstractToken.t + val source: t -> Hmc.Source.Slice.t + + include FormattableIntf.SMono with type t := t +end + +module Token: sig + type t = + | HmcToken of Hmc.Scan.ConcreteToken.t + | HoccToken of ConcreteToken.t + + val source: t -> Hmc.Source.Slice.t + + include FormattableIntf.SMono with type t := t + + val malformations: t -> Hmc.Scan.AbstractToken.Rendition.Malformation.t list + (** [malformations t] returns a list of malformations associated with [t], or an empty list if + there are no malformations. This function can be used on any token variant, even if no + malformations are possible. *) +end + +type t + +include FormattableIntf.SMono with type t := t + +val init: Text.t -> t +(** [init text] initializes scanner to scan [text]. *) + +val text: t -> Text.t +(** [text t] returns the source text for [t]. *) + +val cursor: t -> Hmc.Source.Cursor.t +(** [cursor t] returns the cursor at the scanner's current position. This cursor is equivalent to + the base of the token returned by [next t]. *) + +val next: t -> t * Token.t +(** [next t] scans the next token past the tokens scanned by [t]'s predecessor state(s) and returns + the scanner's successor state along with a token. If [t] is at the end of input, there is no + successor state, and [t, (HmcToken EndOfInput)] is returned. *) diff --git a/bootstrap/bin/hocc/spec.ml b/bootstrap/bin/hocc/spec.ml new file mode 100644 index 000000000..c22ece470 --- /dev/null +++ b/bootstrap/bin/hocc/spec.ml @@ -0,0 +1,252 @@ +open Basis +open! Basis.Rudiments + +type t = { + precs: Prec.t array; +} + +let string_of_token token = + Hmc.Source.Slice.to_string (Scan.Token.source token) + +let fold_hmh io ~precs_map hmh = + let rec fold_precs_tl io ~precs_map rels doms precs_tl = begin + match precs_tl with + | Parse.PrecsTlCommaUident {uident=Uident {uident}; precs_tl; _} -> begin + let name = string_of_token uident in + let rels = match Set.mem name rels with + | true -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp (Scan.Token.source uident) + |> Fmt.fmt ": Redundant relation to precedence: " |> Fmt.fmt name |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + | false -> Set.insert name rels + in + let doms = match Map.get name precs_map with + | None -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp (Scan.Token.source uident) + |> Fmt.fmt ": Relation to undefined precedence: " |> Fmt.fmt name |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + | Some Prec.{index; doms=rel_doms; _} -> Ordset.insert index doms |> Ordset.union rel_doms + in + fold_precs_tl io ~precs_map rels doms precs_tl + end + | PrecsTlEpsilon -> io, doms + end in + let fold_precs io ~precs_map precs = begin + match precs with + | Parse.Precs {uident=Uident {uident}; precs_tl} -> begin + let name = string_of_token uident in + let rels = Set.singleton (module String) name in + let doms = match Map.get name precs_map with + | None -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp (Scan.Token.source uident) + |> Fmt.fmt ": Relation to undefined precedence: " |> Fmt.fmt name |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + | Some Prec.{index; doms; _} -> Ordset.insert index doms + in + fold_precs_tl io ~precs_map rels doms precs_tl + end + end in + let fold_prec io ~precs_map prec : (Io.t * (string, Prec.t, String.cmper_witness) Map.t) = begin + match prec with + | Parse.Prec {prec_type; uident=Uident {uident}; prec_rels} -> begin + let index = Map.length precs_map in + let name = string_of_token uident in + let assoc = match prec_type with + | PrecTypePrec _ -> None + | PrecTypeLeft _ -> Some Assoc.Left + | PrecTypeRight _ -> Some Assoc.Right + in + let io, doms = match prec_rels with + | PrecRelsLtPrecs {precs; _} -> fold_precs io ~precs_map precs + | PrecRelsEpsilon -> io, Ordset.empty (module Uns) + in + let p = Prec.init ~index ~name ~assoc ~doms ~stmt:prec in + let precs_map = match Map.mem name precs_map with + | true -> begin + let io = + io.err + |> Fmt.fmt "hocc: At " |> Hmc.Source.Slice.pp (Scan.Token.source uident) + |> Fmt.fmt ": Redefined precedence: " |> Fmt.fmt name |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + | false -> Map.insert_hlt ~k:name ~v:p precs_map + in + io, precs_map + end + end in + let fold_stmt io ~precs_map stmt = begin + match stmt with + | Parse.StmtPrec {prec} -> fold_prec io ~precs_map prec + | _ -> io, precs_map + end in + let rec fold_stmts_tl io ~precs_map stmts_tl = begin + match stmts_tl with + | Parse.StmtsTl {stmt; stmts_tl; _} -> begin + let io, precs_map = fold_stmt io ~precs_map stmt in + fold_stmts_tl io ~precs_map stmts_tl + end + | StmtsTlEpsilon -> io, precs_map + end in + let fold_stmts io ~precs_map stmts = begin + match stmts with + | Parse.Stmts {stmt; stmts_tl} -> begin + let io, precs_map = fold_stmt io ~precs_map stmt in + fold_stmts_tl io ~precs_map stmts_tl + end + end in + match hmh with Parse.Hmh {hocc=Hocc {stmts; _}; _} -> fold_stmts io ~precs_map stmts + +let init io hmh = + let io = io.log |> Fmt.fmt "hocc: Generating specification\n" |> Io.with_log io in + let io, precs_map = fold_hmh io ~precs_map:(Map.empty (module String)) hmh in + let precs_kvpairs = + Map.to_array precs_map + |> Array.sort ~cmp:(fun (_, Prec.{index=a; _}) (_, {index=b; _}) -> Uns.cmp a b) in + let precs = Array.init (Array.range precs_kvpairs) ~f:(fun i -> + match Array.get i precs_kvpairs with + | (_, prec) -> prec + ) in + io, {precs} + +let to_txt conf io t = + let io = io.log |> Fmt.fmt "hocc: Generating text report\n" |> Io.with_log io in + io.txt + |> Fmt.fmt (Path.Segment.to_string_hlt (Conf.module_ conf)) |> Fmt.fmt "\n\n" + |> Fmt.fmt "Precedences\n" + |> (fun formatter -> + Array.fold ~init:formatter ~f:(fun formatter Prec.{name; assoc; doms; _} -> + formatter + |> Fmt.fmt (match assoc with + | None -> " prec " + | Some Left -> " left " + | Some Right -> " right " + ) + |> Fmt.fmt name + |> (fun formatter -> + match Ordset.is_empty doms with + | true -> formatter + | false -> begin + let _, formatter = Ordset.fold ~init:(true, formatter) + ~f:(fun (first, formatter) prec_ind -> + let formatter = + formatter + |> Fmt.fmt (match first with + | true -> " < " + | false -> ", " + ) + |> Fmt.fmt (Array.get prec_ind t.precs).name + in + (false, formatter) + ) doms + in + formatter + end + ) + |> Fmt.fmt "\n" + ) t.precs + ) + |> Io.with_txt io + +let to_html conf io t = + let io = io.log |> Fmt.fmt "hocc: Generating html report\n" |> Io.with_log io in + io.html + |> Fmt.fmt "\n" + |> Fmt.fmt "

" |> Fmt.fmt (Path.Segment.to_string_hlt (Conf.module_ conf)) |> Fmt.fmt "

\n" + |> Fmt.fmt "

Precedences

\n" + |> Fmt.fmt "\n" + |> Fmt.fmt "
    \n" + |> (fun formatter -> + Array.fold ~init:formatter ~f:(fun formatter Prec.{name; assoc; doms; _} -> + formatter + |> Fmt.fmt "
  • " + |> Fmt.fmt (match assoc with + | None -> "prec" + | Some Left -> "left" + | Some Right -> "right" + ) + |> Fmt.fmt " Fmt.fmt name |> Fmt.fmt "\">" + |> Fmt.fmt name + |> Fmt.fmt "" + |> (fun formatter -> + match Ordset.is_empty doms with + | true -> formatter + | false -> begin + let _, formatter = Ordset.fold ~init:(true, formatter) + ~f:(fun (first, formatter) prec_ind -> + let ref_name = (Array.get prec_ind t.precs).name in + let formatter = + formatter + |> Fmt.fmt (match first with + | true -> " < " + | false -> ", " + ) + |> Fmt.fmt " Fmt.fmt ref_name |> Fmt.fmt "\">" + |> Fmt.fmt ref_name + |> Fmt.fmt "" + in + (false, formatter) + ) doms + in + formatter + end + ) + |> Fmt.fmt "
  • \n" + ) t.precs + ) + |> Fmt.fmt "\n" + |> Fmt.fmt "\n" + |> Io.with_html io + +let to_hocc io t = + let io = io.log |> Fmt.fmt "hocc: Generating hocc report\n" |> Io.with_log io in + io.hocc + |> Fmt.fmt "hocc\n" + |> (fun formatter -> + Array.fold ~init:formatter ~f:(fun formatter Prec.{name; assoc; stmt; _} -> + formatter + |> Fmt.fmt (match assoc with + | None -> " prec " + | Some Left -> " left " + | Some Right -> " right " + ) + |> Fmt.fmt name + |> (fun formatter -> + match stmt with + | Prec {prec_rels=PrecRelsLtPrecs {precs=Precs {uident=Uident {uident}; precs_tl}; _}; _} -> + begin + let rec fmt_precs_tl precs_tl formatter = begin + match precs_tl with + | Parse.PrecsTlCommaUident {uident=Uident {uident}; precs_tl; _} -> begin + formatter + |> Fmt.fmt ", " |> Fmt.fmt (string_of_token uident) + |> fmt_precs_tl precs_tl + end + | PrecsTlEpsilon -> formatter + end in + formatter + |> Fmt.fmt " < " |> Fmt.fmt (string_of_token uident) + |> fmt_precs_tl precs_tl + end + | Prec {prec_rels=PrecRelsEpsilon; _} -> formatter + ) + |> Fmt.fmt "\n" + ) t.precs + ) + |> Io.with_hocc io diff --git a/bootstrap/bin/hocc/spec.mli b/bootstrap/bin/hocc/spec.mli new file mode 100644 index 000000000..f1c2fe96b --- /dev/null +++ b/bootstrap/bin/hocc/spec.mli @@ -0,0 +1,12 @@ +open! Basis +open! Basis.Rudiments + +type t = { + precs: Prec.t array; +} + +val init: Io.t -> Parse.hmh -> Io.t * t + +val to_txt: Conf.t -> Io.t -> t -> Io.t +val to_html: Conf.t -> Io.t -> t -> Io.t +val to_hocc: Io.t -> t -> Io.t diff --git a/bootstrap/src/hmc/scan.ml b/bootstrap/src/hmc/scan.ml index 7c302ccf7..4baf742d7 100644 --- a/bootstrap/src/hmc/scan.ml +++ b/bootstrap/src/hmc/scan.ml @@ -576,6 +576,104 @@ module AbstractToken = struct formatter |> Fmt.fmt "Tok_error " |> (List.pp Rendition.Malformation.pp) mals ) |> Fmt.fmt ">" + + let malformations = function + (* Keywords. *) + | Tok_and | Tok_also | Tok_as | Tok_conceal | Tok_effect | Tok_else | Tok_expose | Tok_external + | Tok_false | Tok_fn | Tok_function | Tok_if | Tok_import | Tok_include | Tok_lazy | Tok_let + | Tok_match | Tok_mutability | Tok_of | Tok_open | Tok_or | Tok_rec | Tok_then | Tok_true + | Tok_type | Tok_when | Tok_with + (* Operators. *) + | Tok_tilde_op _ | Tok_qmark_op _ | Tok_star_star_op _ | Tok_star_op _ | Tok_slash_op _ + | Tok_pct_op _ | Tok_plus_op _ | Tok_minus_op _ | Tok_at_op _ | Tok_caret_op _ | Tok_dollar_op _ + | Tok_lt_op _ | Tok_eq_op _ | Tok_gt_op _ | Tok_bar_op _ | Tok_colon_op _ | Tok_dot_op _ + (* Punctuation. *) + | Tok_tilde | Tok_qmark | Tok_minus | Tok_lt | Tok_lt_eq | Tok_eq | Tok_lt_gt | Tok_gt_eq + | Tok_gt | Tok_comma | Tok_dot | Tok_dot_dot | Tok_semi | Tok_colon | Tok_colon_colon + | Tok_colon_eq | Tok_lparen | Tok_rparen | Tok_lbrack | Tok_rbrack | Tok_lcurly | Tok_rcurly + | Tok_bar | Tok_lcapture | Tok_rcapture | Tok_larray | Tok_rarray | Tok_bslash | Tok_tick + | Tok_caret | Tok_amp | Tok_xmark | Tok_arrow | Tok_carrow + (* Miscellaneous. *) + | Tok_source_directive (Constant _) + | Tok_line_delim + | Tok_indent (Constant _) + | Tok_dedent (Constant _) + | Tok_whitespace|Tok_hash_comment + | Tok_paren_comment (Constant _) + | Tok_uscore + | Tok_uident (Constant _) + | Tok_cident _ + | Tok_codepoint (Constant _) + | Tok_rstring (Constant _) + | Tok_istring (Constant _) + | Tok_fstring_lditto + | Tok_fstring_interpolated (Constant _) + | Tok_fstring_pct + | Tok_fstring_pad (Constant _) + | Tok_fstring_just _ | Tok_fstring_sign _ | Tok_fstring_alt | Tok_fstring_zpad + | Tok_fstring_width_star + | Tok_fstring_width (Constant _) + | Tok_fstring_pmode _ | Tok_fstring_precision_star + | Tok_fstring_precision (Constant _) + | Tok_fstring_radix _ | Tok_fstring_notation _ | Tok_fstring_pretty + | Tok_fstring_fmt (Constant _) + | Tok_fstring_sep (Constant _) + | Tok_fstring_label _ | Tok_fstring_lparen_caret | Tok_fstring_caret_rparen | Tok_fstring_rditto + | Tok_r32 (Constant _) + | Tok_r64 (Constant _) + | Tok_u8 (Constant _) + | Tok_i8 (Constant _) + | Tok_u16 (Constant _) + | Tok_i16 (Constant _) + | Tok_u32 (Constant _) + | Tok_i32 (Constant _) + | Tok_u64 (Constant _) + | Tok_i64 (Constant _) + | Tok_u128 (Constant _) + | Tok_i128 (Constant _) + | Tok_u256 (Constant _) + | Tok_i256 (Constant _) + | Tok_u512 (Constant _) + | Tok_i512 (Constant _) + | Tok_nat (Constant _) + | Tok_zint (Constant _) + | Tok_end_of_input | Tok_misaligned + -> [] + (* Malformations. *) + | Tok_source_directive (Malformed mals) + | Tok_indent (Malformed mals) + | Tok_dedent (Malformed mals) + | Tok_paren_comment (Malformed mals) + | Tok_uident (Malformed mals) + | Tok_codepoint (Malformed mals) + | Tok_rstring (Malformed mals) + | Tok_istring (Malformed mals) + | Tok_fstring_interpolated (Malformed mals) + | Tok_fstring_pad (Malformed mals) + | Tok_fstring_width (Malformed mals) + | Tok_fstring_precision (Malformed mals) + | Tok_fstring_fmt (Malformed mals) + | Tok_fstring_sep (Malformed mals) + | Tok_r32 (Malformed mals) + | Tok_r64 (Malformed mals) + | Tok_u8 (Malformed mals) + | Tok_i8 (Malformed mals) + | Tok_u16 (Malformed mals) + | Tok_i16 (Malformed mals) + | Tok_u32 (Malformed mals) + | Tok_i32 (Malformed mals) + | Tok_u64 (Malformed mals) + | Tok_i64 (Malformed mals) + | Tok_u128 (Malformed mals) + | Tok_i128 (Malformed mals) + | Tok_u256 (Malformed mals) + | Tok_i256 (Malformed mals) + | Tok_u512 (Malformed mals) + | Tok_i512 (Malformed mals) + | Tok_nat (Malformed mals) + | Tok_zint (Malformed mals) + | Tok_error mals + -> mals end module ConcreteToken = struct @@ -812,6 +910,9 @@ let view_of_t t = let text t = Source.(text (Cursor.container t.tok_base)) +let cursor {tok_base; _} = + tok_base + let str_of_cursor cursor t = Source.Slice.to_string (Source.Slice.of_cursors ~base:t.tok_base ~past:cursor) diff --git a/bootstrap/src/hmc/scan.mli b/bootstrap/src/hmc/scan.mli index 6c6d41eeb..564706a88 100644 --- a/bootstrap/src/hmc/scan.mli +++ b/bootstrap/src/hmc/scan.mli @@ -184,6 +184,7 @@ module AbstractToken : sig | Tok_arrow | Tok_carrow + (* Miscellaneous. *) | Tok_source_directive of source_directive Rendition.t | Tok_line_delim | Tok_indent of unit Rendition.t @@ -242,6 +243,11 @@ module AbstractToken : sig | Tok_error of Rendition.Malformation.t list val pp: t -> (module Fmt.Formatter) -> (module Fmt.Formatter) + + val malformations: t -> Rendition.Malformation.t list + (** [malformations t] returns a list of malformations associated with [t], or an empty list if + there are no malformations. This function can be used on any token variant, even if no + malformations are possible. *) end (** Concrete tokens augment abstract tokens with source locations. *) @@ -259,12 +265,18 @@ end type t +include FormattableIntf.SMono with type t := t + val init: Text.t -> t (** [init text] initializes scanner to scan [text]. *) val text: t -> Text.t (** [text t] returns the source text for [t]. *) +val cursor: t -> Source.Cursor.t +(** [cursor t] returns the cursor at the scanner's current position. This cursor is equivalent to + the base of the token returned by [next t]. *) + val next: t -> t * ConcreteToken.t (** [next t] scans the next token past the tokens scanned by [t]'s predecessor state(s) and returns the scanner's successor state along with a token. If [t] is at the end of input, there is no diff --git a/bootstrap/test/basis/seed/test_seed0.ml b/bootstrap/test/basis/seed/test_seed0.ml index f8e1b1e91..ad4899332 100644 --- a/bootstrap/test/basis/seed/test_seed0.ml +++ b/bootstrap/test/basis/seed/test_seed0.ml @@ -3,7 +3,7 @@ open Basis let () = File.Fmt.stdout |> Fmt.fmt "HEMLOCK_ENTROPY=" - |> String.pp (Sys.getenv "HEMLOCK_ENTROPY") + |> String.pp (Stdlib.Sys.getenv "HEMLOCK_ENTROPY") |> Fmt.fmt " -> seed=" |> Hash.State.pp Hash.State.seed |> Fmt.fmt "\n" diff --git a/bootstrap/test/basis/seed/test_seed42.ml b/bootstrap/test/basis/seed/test_seed42.ml index f8e1b1e91..ad4899332 100644 --- a/bootstrap/test/basis/seed/test_seed42.ml +++ b/bootstrap/test/basis/seed/test_seed42.ml @@ -3,7 +3,7 @@ open Basis let () = File.Fmt.stdout |> Fmt.fmt "HEMLOCK_ENTROPY=" - |> String.pp (Sys.getenv "HEMLOCK_ENTROPY") + |> String.pp (Stdlib.Sys.getenv "HEMLOCK_ENTROPY") |> Fmt.fmt " -> seed=" |> Hash.State.pp Hash.State.seed |> Fmt.fmt "\n" diff --git a/bootstrap/test/hocc/A.expected b/bootstrap/test/hocc/A.expected new file mode 100644 index 000000000..d562faf97 --- /dev/null +++ b/bootstrap/test/hocc/A.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./A.hmh" +hocc: Generating specification diff --git a/bootstrap/test/hocc/A.hmh b/bootstrap/test/hocc/A.hmh new file mode 100644 index 000000000..5efa06634 --- /dev/null +++ b/bootstrap/test/hocc/A.hmh @@ -0,0 +1,22 @@ +hocc + left p1 + left p2 < p1 + + token PLUS + token STAR + token LPAREN + token RPAREN prec p1 + token ID + token EOI + + start E ::= + | E PLUS T EOI prec p2 + | T EOI + + nonterm T ::= + | T STAR F + | F + + nonterm F prec p1 ::= + | LPAREN E RPAREN + | ID diff --git a/bootstrap/test/hocc/B.expected b/bootstrap/test/hocc/B.expected new file mode 100644 index 000000000..c8be70f0e --- /dev/null +++ b/bootstrap/test/hocc/B.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./B.hmh" +hocc: Generating specification diff --git a/bootstrap/test/hocc/B.hmh b/bootstrap/test/hocc/B.hmh new file mode 100644 index 000000000..59ad744b7 --- /dev/null +++ b/bootstrap/test/hocc/B.hmh @@ -0,0 +1,19 @@ +hocc + token PLUS + token STAR + token LPAREN + token RPAREN + token ID + token EOI + + start E ::= + | E PLUS T EOI + | T EOI + + nonterm T ::= + | T STAR F + | F + + nonterm F ::= + | LPAREN E RPAREN + | ID diff --git a/bootstrap/test/hocc/C.expected b/bootstrap/test/hocc/C.expected new file mode 100644 index 000000000..6f9351b77 --- /dev/null +++ b/bootstrap/test/hocc/C.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./C.hmh" +hocc: Generating specification diff --git a/bootstrap/test/hocc/C.hmh b/bootstrap/test/hocc/C.hmh new file mode 100644 index 000000000..68d76f532 --- /dev/null +++ b/bootstrap/test/hocc/C.hmh @@ -0,0 +1,15 @@ +hocc + token EQUAL + token STAR + token ID + token EOI + + start S ::= + | L EQUAL R EOI + | R EOI + + nonterm L ::= + | STAR R + | ID + + nonterm R ::= L diff --git a/bootstrap/test/hocc/D.expected b/bootstrap/test/hocc/D.expected new file mode 100644 index 000000000..8ad584a5b --- /dev/null +++ b/bootstrap/test/hocc/D.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./D.hmh" +hocc: Generating specification diff --git a/bootstrap/test/hocc/D.hmh b/bootstrap/test/hocc/D.hmh new file mode 100644 index 000000000..bfdacc522 --- /dev/null +++ b/bootstrap/test/hocc/D.hmh @@ -0,0 +1,15 @@ +hocc + left p1 + left p2 < p1 + + token STAR "*" prec p1 + token PLUS "+" prec p2 + token ID + token EOI + + start S ::= E EOI + + nonterm E ::= + | ID + | E "*" E prec p1 + | E "+" E prec p2 diff --git a/bootstrap/test/hocc/E.expected b/bootstrap/test/hocc/E.expected new file mode 100644 index 000000000..4589e8d47 --- /dev/null +++ b/bootstrap/test/hocc/E.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./E.hmh" +hocc: Generating specification diff --git a/bootstrap/test/hocc/E.hmh b/bootstrap/test/hocc/E.hmh new file mode 100644 index 000000000..2a343971c --- /dev/null +++ b/bootstrap/test/hocc/E.hmh @@ -0,0 +1,10 @@ +hocc + token C + token D + token EOI + + start S ::= N N EOI + + nonterm N ::= + | C N + | D diff --git a/bootstrap/test/hocc/Example1.expected b/bootstrap/test/hocc/Example1.expected new file mode 100644 index 000000000..d44dc0d37 --- /dev/null +++ b/bootstrap/test/hocc/Example1.expected @@ -0,0 +1,3 @@ +hocc: Parsing "./Example1.hmhi" +hocc: Parsing "./Example1.hmh" +hocc: Generating specification diff --git a/bootstrap/test/hocc/Example1.hmh b/bootstrap/test/hocc/Example1.hmh new file mode 100644 index 000000000..e4ecf7fc6 --- /dev/null +++ b/bootstrap/test/hocc/Example1.hmh @@ -0,0 +1,62 @@ +open import Basis + +# Specify the parser. `hocc ...` expands to a module implementation, `{ ... }`. +include hocc + left mul + token STAR "*" prec mul + token SLASH "/" prec mul + nonterm MulOp of Token.t ::= + | "*" -> STAR + | "/" -> SLASH + + left add < mul + token PLUS "+" prec add + token MINUS "-" prec add + nonterm AddOp of Token.t ::= + | "+" -> PLUS + | "-" -> MINUS + + token INT of Zint.t + nonterm Expr of Zint.t ::= + | e0:Expr op:MulOp e1:Expr -> + match op with + | MulOp STAR -> Zint.(e0 * e1) + | MulOp SLASH -> Zint.(e0 / e1) + | e0:Expr op:AddOp e1:Expr prec add -> + match op with + | AddOp PLUS -> Zint.(e0 + e1) + | AddOp MINUS -> Zint.(e0 - e1) + | x:INT -> x + + token EOI + start Answer of Zint.t ::= + | e:Expr EOI -> e + +# Tokenize `s`, e.g. "2 + 3 * 4", and append an `EOI` token. +tokenize s = + s |> String.split_rev ~f:(fn cp -> Codepoint.O.(cp = ' ')) + |> List.rev_filter ~f:(fn s -> String.length s <> 0) + |> List.rev_map ~f:fn s -> + let open Token + match s with + | "*" -> STAR + | "/" -> SLASH + | "+" -> PLUS + | "-" -> MINUS + | _ -> INT (Zint.of_string s) + |> List.push Token.EOI + |> List.rev + +# Calculate the result of the arithmetic expression expressed in `s`, e.g. "2 + 3 * 4". +calculate s = + List.fold_until (tokenize s) ~init:Start.Answer.boi ~f:fn parser tok -> + let parser' = Start.Answer.next tok parser + let done = match status parser' with + | Prefix -> false + | Accept _ + | Error _ -> true + parser', done + |> function + | Accept answer -> answer + | Prefix _ -> halt "Partial input" + | Error _ -> halt "Parse error" diff --git a/bootstrap/test/hocc/Example1.hmhi b/bootstrap/test/hocc/Example1.hmhi new file mode 100644 index 000000000..b49400610 --- /dev/null +++ b/bootstrap/test/hocc/Example1.hmhi @@ -0,0 +1,9 @@ +open import Basis + +# Export the parser API so that alternatives to `calculate` can be implemented. `hocc` expands to a +# module signature. +include hocc + +calulate: string -> zint + [@@doc "Calculate the result of a simple arithmetic expression comprising non-negative integers + and `+`, `-`, `*`, and `/` operators. Tokens must be separated by one or more spaces."] diff --git a/bootstrap/test/hocc/F.expected b/bootstrap/test/hocc/F.expected new file mode 100644 index 000000000..7f00f5d5e --- /dev/null +++ b/bootstrap/test/hocc/F.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./F.hmh" +hocc: Generating specification diff --git a/bootstrap/test/hocc/F.hmh b/bootstrap/test/hocc/F.hmh new file mode 100644 index 000000000..41380461c --- /dev/null +++ b/bootstrap/test/hocc/F.hmh @@ -0,0 +1,16 @@ +hocc + token A + token B + token C + token D + token E + + start S ::= + | A M D + | B N D + | A N E + | B M E + + nonterm M ::= C + + nonterm N ::= C diff --git a/bootstrap/test/hocc/Hocc.expected b/bootstrap/test/hocc/Hocc.expected new file mode 100644 index 000000000..8da38df6a --- /dev/null +++ b/bootstrap/test/hocc/Hocc.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Hocc.hmh" +hocc: Generating specification diff --git a/bootstrap/test/hocc/Hocc.hmh b/bootstrap/test/hocc/Hocc.hmh new file mode 100644 index 000000000..7f1fc5f37 --- /dev/null +++ b/bootstrap/test/hocc/Hocc.hmh @@ -0,0 +1,177 @@ +hocc + # hocc-specific keywords + token HOCC "hocc" + token NONTERM "nonterm" + token EPSILON "epsilon" + token START "start" + token PREC "prec" + token LEFT "left" + token RIGHT "right" + + # Identifiers + token UIDENT # Uncapitalized + token CIDENT # Capitalized + token USCORE "_" + + # Token alias + token STRING + + # Punctuation/separators + token COLON_COLON_EQ "::=" + token OF "of" + token DOT "." + token ARROW "->" + token BAR "|" + token LT "<" + token COMMA "," + token SEMI ";" + token LINE_DELIM + + # Left-right paired delimiters + token INDENT + token DEDENT + token LPAREN "(" + token RPAREN ")" + token LCAPTURE "(|" + token RCAPTURE "|)" + token LBRACK "[" + token RBRACK "]" + token LARRAY "[|" + token RARRAY "|]" + token LCURLY "{" + token RCURLY "}" + + # Miscellaneous Hemlock token in embedded code + token CODE_TOKEN + + # End of input, used to terminate start symbols + token EOI + + nonterm Ident ::= UIDENT | CIDENT | "_" + + nonterm PrecsTl ::= + | "," UIDENT PrecsTl + | epsilon + + nonterm Precs ::= UIDENT PrecsTl + + nonterm PrecRels ::= + | "<" Precs + | epsilon + + nonterm PrecType ::= "prec" | "left" | "right" + + nonterm Prec ::= PrecType UIDENT PrecRels + + nonterm OfType ::= "of" CIDENT "." UIDENT + + nonterm OfType0 ::= + | OfType + | epsilon + + nonterm PrecRef ::= + | "prec" UIDENT + | epsilon + + nonterm TokenAlias ::= + | STRING + | epsilon + + nonterm Token ::= "token" CIDENT TokenAlias OfType0 PrecRef + + nonterm Sep ::= LINE_DELIM | ";" | "|" + + nonterm CodesTl ::= + | Sep Code CodesTl + | epsilon + + nonterm Codes ::= Code CodesTl + + nonterm Codes0 ::= + | Codes + | epsilon + + nonterm Delimited ::= + | INDENT Codes DEDENT + | "(" Codes0 ")" + | "(|" Codes0 "|)" + | "[" Codes0 "]" + | "[|" Codes0 "|]" + | "{" Codes0 "}" + + nonterm CodeTl ::= + | Delimited CodeTl + | CODE_TOKEN CodeTl + | epsilon + + nonterm Code ::= + | Delimited CodeTl + | CODE_TOKEN CodeTl + + nonterm ProdParamType ::= + | CIDENT + | STRING + + nonterm ProdParamIdent ::= + | Ident ":" + | epsilon + + nonterm ProdParam ::= ProdParamIdent ProdParamType + + nonterm ProdParamsTl ::= + | ProdParam ProdParamsTl + | epsilon + + nonterm ProdParams ::= ProdParam ProdParamsTl + + nonterm ProdPattern ::= + | ProdParams + | "epsilon" + + nonterm Prod ::= ProdPattern PrecRef + + nonterm ProdsTl ::= + | "|" Prod ProdsTl + | epsilon + + nonterm Prods ::= + | "|" Prod ProdsTl + | Prod ProdsTl + + nonterm Reduction ::= Prods "->" Code + + nonterm ReductionsTl ::= + | "|" Reduction ReductionsTl + | epsilon + + nonterm Reductions ::= + | "|" Reduction ReductionsTl + | Reduction ReductionsTl + + nonterm NontermType ::= "nonterm" | "start" + + nonterm Nonterm ::= + | NontermType CIDENT OfType PrecRef "::=" Reductions + | NontermType CIDENT PrecRef "::=" Prods + + nonterm Stmt ::= + | Prec + | Token + | Nonterm + | Code + + nonterm StmtsTl ::= + | LINE_DELIM Stmt StmtsTl + | epsilon + + nonterm Stmts ::= Stmt StmtsTl + + nonterm Hocc ::= "hocc" INDENT Stmts DEDENT + + nonterm Matter ::= + | CODE_TOKEN Matter + | epsilon + + start Hmh ::= Matter Hocc Matter EOI + + start Hmhi ::= Matter "hocc" Matter EOI diff --git a/bootstrap/test/hocc/Lyken.expected b/bootstrap/test/hocc/Lyken.expected new file mode 100644 index 000000000..2436e948f --- /dev/null +++ b/bootstrap/test/hocc/Lyken.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Lyken.hmh" +hocc: Generating specification diff --git a/bootstrap/test/hocc/Lyken.hmh b/bootstrap/test/hocc/Lyken.hmh new file mode 100644 index 000000000..a58ec3c99 --- /dev/null +++ b/bootstrap/test/hocc/Lyken.hmh @@ -0,0 +1,1258 @@ +hocc + ################################################################################################ + # Precedences. + + prec pGenericParamOne + left pColon < pGenericParamOne + + prec pProcParmBody + left pRparen < pProcParmBody + prec pList < pRparen, pColon + + prec pId + prec pOptionalGenericParamList < pId + left pLparen < pOptionalGenericParamList, pList + left pRarrow < pOptionalGenericParamList + + prec pProcParmRequired2 + prec pProcParmRequired1 < pProcParmRequired2 + + prec pTypeName2 + prec pVarRestId + left pDot < pTypeName2, pVarRestId, pOptionalGenericParamList + left pLbrace + left pLbracket < pVarRestId, pOptionalGenericParamList + prec pThis < pDot, pLbrace, pLbracket + + prec pUnaryExpr2 < pDot, pLbracket, pLparen + right pPowOp < pDot, pLbracket, pLparen, pUnaryExpr2 + left pMulOp < pPowOp + left pPlusOp < pMulOp + + prec pLvalListBody < pLbracket, pPlusOp + left pEq < pLvalListBody + prec pDoWhileExpr2 + + left pCmpOp1 < pPlusOp, pLvalListBody, pDoWhileExpr2 + prec pAssnExpr1 < pCmpOp1 + + prec pExprListBodyA < pCmpOp1, pDot, pLbracket, pLparen + prec pExprListBodyB < pLbracket, pCmpOp1, pPlusOp + + prec pDictElm + prec pProcParmRequiredList + prec pProcParmOptional + prec pInitFieldList + prec pCallNamed + prec pCallPargs + prec pCallThis + left pComma < pDictElm, pProcParmRequiredList, pProcParmOptional, pInitFieldList, pCallNamed, + pCallPargs, pCallThis, pLvalListBody, pUnaryExpr2, pPlusOp, pAssnExpr1, pExprListBodyA, + pExprListBodyB + + left pCmpOp2 < pCmpOp1, pComma, pExprListBodyA + prec pUnaryExpr1 < pDot, pCmpOp2 + prec pAssnExpr2 < pCmpOp2, pDot, pLparen, pPlusOp + left pCmpOp3 < pCmpOp2, pUnaryExpr1, pAssnExpr1, pAssnExpr2 + + prec pAssnExpr3 + + left pCmpOp4 < pCmpOp3 + left pCmpOp5 < pCmpOp4 + + prec pTypeNameSuffixElm < pDot + prec pTypeNameSuffix1 < pDot, pLbracket + prec pTypeName1 < pTypeNameSuffix1 + prec pLvalPrefix1 < pDot, pLbracket + prec pLvalPrefix2 < pLbracket + prec pLvalSuffix < pTypeNameSuffix1 + prec pLval1 < pDot + prec pLval2 < pRparen + prec pAnnotations2 + prec pAnnotations1 < pAnnotations2 + prec pImplements + prec pImplementsInterface < pImplements + prec pImportModuleRelpath < pPowOp + prec pImportModuleName < pDot + prec pImportVars < pComma + prec pImportLval < pComma + prec pVar1 < pDot + prec pVar2 < pVar1 + prec pElseClause < pCmpOp2 + prec pDoWhileExpr1 < pCmpOp5, pComma, pDot, pLbracket, pLparen, pCmpOp2 + prec pAssertExpr < pCmpOp5, pComma + left pRdangle + prec pNondelimitedExpr1 < pRdangle + prec pNondelimitedExpr2 < pRparen + prec pLvalSubscript + prec pExprSlice < pLvalSubscript + prec pExprSuffix < pLbracket, pLvalSuffix + + prec pReturnStmt1 + prec pThrowStmt1 + prec pAssnExprLeft4 + prec pAssnExprLeft2 < pComma + prec pAssnExprLeft1 < pAssnExprLeft2, pAssnExprLeft4 + prec pAssnExprLeft3 < pReturnStmt1, pThrowStmt1, pComma, pRparen + prec pAssnExprLeft5 < pComma + + prec pExpr1 + prec pStmtList < pExpr1 + prec pExpr2 < pDot + prec pStmt + left pRbrace < pStmt + prec pExprList1 < pComma, pRparen, pRbrace + prec pExprList2 < pAssnExpr3, pPlusOp, pPowOp, pMulOp + + left pIs + left pBar + left pLdangle + left pRbracket + + ################################################################################################ + # Tokens. + + token BOI + token INT + token FLOAT + token STR + token BUF + token CBLOCK + token ID prec pId + token COMMENT + token WHITESPACE + + # Keywords. + token AND prec pCmpOp3 + token ASSERT + token ATTR + token BLANK + token BREAK + token CATCH + token CLASS + token CONST + token CONTINUE + token DEBUG + token DO + token ELIF + token ELSE + token ENUM + token EXTENDS + token FALSE + token FILE + token FINAL + token FOR + token FROM + token GUARD + token IF + token IMPLEMENTS prec pImplements + token IMPORT + token IN prec pCmpOp2 + token INF + token INIT + token INTERFACE + token IS prec pIs + token LINE + token MEMBER + token METH + token MODULE + token NAN + token NOT prec pCmpOp2 + token NULL + token OR prec pCmpOp5 + token PRELUDE + token PRIVATE + token PROC + token PROTECTED + token PUBLIC + token PURE + token RETURN + token SELECT + token STATIC + token THIS prec pThis + token THROW + token TRUE + token VAR + token VIRTUAL + token WHERE + token WHILE + token XOR prec pCmpOp4 + + # Symbols. + token POW prec pPowOp + + token MUL prec pMulOp + token DIV prec pMulOp + token MOD prec pMulOp + + token PLUS prec pPlusOp + token MINUS prec pPlusOp + + token EQEQ prec pCmpOp2 + token NOTEQ prec pCmpOp2 + token EQEQEQ prec pCmpOp2 + token NOTEQEQ prec pCmpOp2 + + token LT prec pCmpOp1 + token LE prec pCmpOp1 + token GT prec pCmpOp1 + token GE prec pCmpOp1 + + token EQ prec pEq + + token PLUSEQ + token MINUSEQ + token MULEQ + token DIVEQ + token MODEQ + token POWEQ + + token BAR prec pBar + token RARROW prec pRarrow + token COMMA prec pComma + token LPAREN prec pLparen + token LBRACE prec pLbrace + token LBRACKET prec pLbracket + token LDANGLE prec pLdangle + token COLON prec pColon + token SEMICOLON + token RPAREN prec pRparen + token RBRACE prec pRbrace + token RBRACKET prec pRbracket + token RDANGLE prec pRdangle + token DOT prec pDot + + ################################################################################################ + # Non-terminals. + + nonterm SliceTerm ::= + | Expr + | Lval + | epsilon + + nonterm Slice ::= + | SliceTerm COLON SliceTerm COLON SliceTerm + | SliceTerm COLON SliceTerm + | Expr + | Lval + + nonterm ParamTypeList ::= + | TypeSpec + | ParamTypeList COMMA TypeSpec + + nonterm DerivationConstraint ::= + | ID EXTENDS TypeName ImplementsInterface + | ID IMPLEMENTS InterfaceList + + nonterm DerivationConstraintList ::= + | DerivationConstraint + | DerivationConstraintList AND DerivationConstraint + + nonterm GenericParamOne ::= + | LDANGLE TypeSpec RDANGLE + | LDANGLE TypeSpec WHERE DerivationConstraint RDANGLE + | epsilon prec pGenericParamOne + + nonterm GenericParamTwo ::= + | LDANGLE TypeSpec COMMA TypeSpec RDANGLE + | LDANGLE TypeSpec COMMA TypeSpec WHERE DerivationConstraintList RDANGLE + | epsilon + + nonterm OptionalGenericParamList prec pOptionalGenericParamList ::= + | LDANGLE ParamTypeList RDANGLE + | LDANGLE ParamTypeList WHERE DerivationConstraintList RDANGLE + | epsilon + + nonterm TypeNameSuffixElm ::= + | ID OptionalGenericParamList prec pTypeNameSuffixElm + + nonterm TypeNameSuffix ::= + | TypeNameSuffixElm prec pTypeNameSuffix1 + | TypeNameSuffix DOT TypeNameSuffixElm + + nonterm TypeName ::= + | ID OptionalGenericParamList prec pTypeName1 + | ID OptionalGenericParamList DOT TypeNameSuffix prec pTypeName2 + | TypeName DOT TypeNameSuffix prec pTypeName2 + + nonterm Proc ::= + | PROC + | METH + + nonterm ProtoType ::= + | Proc OptionalGenericParamList + | Proc OptionalGenericParamList RARROW LPAREN RPAREN + | Proc OptionalGenericParamList RARROW LPAREN ProcRetBody RPAREN + + | Proc OptionalGenericParamList LPAREN ProcParmBody RPAREN + | Proc OptionalGenericParamList LPAREN ProcParmBody RPAREN RARROW LPAREN RPAREN + | Proc OptionalGenericParamList LPAREN ProcParmBody RPAREN RARROW LPAREN ProcRetBody RPAREN + + nonterm TypeSpec ::= + | TypeName + | TypeSpec BAR TypeName + + | ProtoType + | TypeSpec BAR ProtoType + + nonterm Blank ::= + | BLANK + + nonterm LvalSubscript prec pLvalSubscript ::= + | LBRACKET Slice RBRACKET + | LBRACKET RBRACKET + + nonterm LvalPrefix ::= + # TypeName-conforming syntax; may not actually refer to type name. + | TypeName prec pLvalPrefix1 + | TypeName LvalSubscript + + | LPAREN ImportLval RPAREN prec pLvalPrefix2 + | LPAREN ImportLval RPAREN LvalSubscript + + | THIS prec pThis + | THIS LvalSubscript + + nonterm LvalSuffix ::= + | ID prec pLvalSuffix + | ID LvalSubscript + + | LvalSuffix DOT ID prec pLvalSuffix + | LvalSuffix DOT ID LvalSubscript + + nonterm Lval ::= + | ImportLval prec pLval2 + | Blank + | LvalPrefix prec pLval1 + | LvalPrefix DOT LvalSuffix prec pLval1 + | TypeName DOT LvalSuffix prec pLval1 + | LPAREN Lval RPAREN + + nonterm LvalListBody prec pLvalListBody ::= + | Annotations Var COMMA Lval + | Var COMMA Lval + | Lval COMMA Lval + | Lval COMMA Annotations Var + | Lval COMMA Var + | Annotations Var COMMA Var + | Var COMMA Var + + | LvalListBody COMMA Annotations Var + | LvalListBody COMMA Var + | LvalListBody COMMA Lval + + nonterm DictElm prec pDictElm ::= + | Expr COLON Expr + | Expr COLON Lval + | Lval COLON Expr + | Lval COLON Lval + nonterm DictList ::= + | DictElm + | DictList COMMA DictElm + nonterm Dict ::= + | LBRACE COLON RBRACE GenericParamTwo + | LBRACE DictList RBRACE GenericParamTwo + + nonterm List ::= + | LBRACKET Expr RBRACKET GenericParamOne + | LBRACKET Lval RBRACKET GenericParamOne + | LBRACKET ExprListBody RBRACKET GenericParamOne + | LBRACKET RBRACKET GenericParamOne prec pList + + nonterm Annotation ::= + | LDANGLE Expr RDANGLE + | LDANGLE Lval RDANGLE + | LDANGLE ExprListBody RDANGLE + | LDANGLE Str RDANGLE + | CONST + | FINAL + | PRELUDE + | PRIVATE + | PROTECTED + | PUBLIC + | PURE + | STATIC + | VIRTUAL + | LDANGLE RDANGLE + nonterm Annotations ::= + | Annotation prec pAnnotations1 + | Annotations Annotation prec pAnnotations2 + + nonterm ProcParmRequired ::= + | Annotations TypeSpec ID prec pProcParmRequired2 + | TypeSpec ID prec pProcParmRequired1 + | Annotations ID prec pProcParmRequired2 + | ID prec pProcParmRequired1 + | Annotations TypeSpec BLANK prec pProcParmRequired2 + | TypeSpec BLANK prec pProcParmRequired1 + | Annotations BLANK prec pProcParmRequired2 + | BLANK prec pProcParmRequired1 + nonterm ProcParmRequiredList ::= + | ProcParmRequired prec pProcParmRequiredList + | ProcParmRequiredList COMMA ProcParmRequired + + nonterm ProcParmOptional prec pProcParmOptional ::= + | Annotations TypeSpec ID EQ Expr + | Annotations TypeSpec ID EQ Lval + + | TypeSpec ID EQ Expr + | TypeSpec ID EQ Lval + + | Annotations ID EQ Expr + | Annotations ID EQ Lval + | ID EQ Expr + | ID EQ Lval + | Annotations TypeSpec BLANK EQ Expr + | Annotations TypeSpec BLANK EQ Lval + | TypeSpec BLANK EQ Expr + | TypeSpec BLANK EQ Lval + | Annotations BLANK EQ Expr + | Annotations BLANK EQ Lval + | BLANK EQ Expr + | BLANK EQ Lval + nonterm ProcParmOptionalList ::= + | ProcParmOptional + | ProcParmOptionalList COMMA ProcParmOptional + + nonterm ProcParmPargs ::= + | Annotations LBRACKET RBRACKET GenericParamOne ID + | LBRACKET RBRACKET GenericParamOne ID + | Annotations LBRACKET RBRACKET GenericParamOne BLANK + | LBRACKET RBRACKET GenericParamOne BLANK + + nonterm ProcParmKargs ::= + | Annotations LBRACE COLON RBRACE GenericParamTwo ID + | LBRACE COLON RBRACE GenericParamTwo ID + | Annotations LBRACE COLON RBRACE GenericParamTwo BLANK + | LBRACE COLON RBRACE GenericParamTwo BLANK + + nonterm ProcParmBody ::= + | ProcRetBody + | epsilon prec pProcParmBody + + nonterm ProcRetBody ::= + | ProcParmRequiredList COMMA ProcParmOptionalList COMMA ProcParmPargs COMMA ProcParmKargs + | ProcParmRequiredList COMMA ProcParmOptionalList COMMA ProcParmPargs + | ProcParmRequiredList COMMA ProcParmOptionalList COMMA ProcParmKargs + | ProcParmRequiredList COMMA ProcParmOptionalList + | ProcParmRequiredList COMMA ProcParmPargs COMMA ProcParmKargs + | ProcParmRequiredList COMMA ProcParmPargs + | ProcParmRequiredList COMMA ProcParmKargs + | ProcParmRequiredList + | ProcParmOptionalList COMMA ProcParmPargs COMMA ProcParmKargs + | ProcParmOptionalList COMMA ProcParmPargs + | ProcParmOptionalList COMMA ProcParmKargs + | ProcParmOptionalList + | ProcParmPargs COMMA ProcParmKargs + | ProcParmPargs + | ProcParmKargs + + nonterm Str ::= + | STR + | Str STR + + nonterm Buf ::= + | BUF + | Buf BUF + + nonterm ProcDecl ::= + | Annotations Proc OptionalGenericParamList + | Annotations Proc OptionalGenericParamList RARROW ProcRetBody + | Annotations Proc OptionalGenericParamList RARROW LPAREN RPAREN + | Annotations Proc OptionalGenericParamList RARROW LPAREN ProcRetBody RPAREN + + | Proc OptionalGenericParamList + | Proc OptionalGenericParamList RARROW ProcRetBody + | Proc OptionalGenericParamList RARROW LPAREN RPAREN + | Proc OptionalGenericParamList RARROW LPAREN ProcRetBody RPAREN + + | Annotations Proc ID OptionalGenericParamList + | Annotations Proc ID OptionalGenericParamList RARROW ProcRetBody + | Annotations Proc ID OptionalGenericParamList RARROW LPAREN RPAREN + | Annotations Proc ID OptionalGenericParamList RARROW LPAREN ProcRetBody RPAREN + + | Proc ID OptionalGenericParamList + | Proc ID OptionalGenericParamList RARROW ProcRetBody + | Proc ID OptionalGenericParamList RARROW LPAREN RPAREN + | Proc ID OptionalGenericParamList RARROW LPAREN ProcRetBody RPAREN + + | Annotations Proc OptionalGenericParamList LPAREN ProcParmBody RPAREN + | Annotations Proc OptionalGenericParamList LPAREN ProcParmBody RPAREN RARROW ProcRetBody + | Annotations Proc OptionalGenericParamList LPAREN ProcParmBody RPAREN RARROW LPAREN RPAREN + | Annotations Proc OptionalGenericParamList LPAREN ProcParmBody RPAREN RARROW LPAREN + ProcRetBody RPAREN + + | Proc OptionalGenericParamList LPAREN ProcParmBody RPAREN + | Proc OptionalGenericParamList LPAREN ProcParmBody RPAREN RARROW ProcRetBody + | Proc OptionalGenericParamList LPAREN ProcParmBody RPAREN RARROW LPAREN RPAREN + | Proc OptionalGenericParamList LPAREN ProcParmBody RPAREN RARROW LPAREN ProcRetBody RPAREN + + | Annotations Proc ID OptionalGenericParamList LPAREN ProcParmBody RPAREN + | Annotations Proc ID OptionalGenericParamList LPAREN ProcParmBody RPAREN RARROW ProcRetBody + | Annotations Proc ID OptionalGenericParamList LPAREN ProcParmBody RPAREN RARROW LPAREN RPAREN + | Annotations Proc ID OptionalGenericParamList LPAREN ProcParmBody RPAREN RARROW LPAREN + ProcRetBody RPAREN + + | Proc ID OptionalGenericParamList LPAREN ProcParmBody RPAREN + | Proc ID OptionalGenericParamList LPAREN ProcParmBody RPAREN RARROW ProcRetBody + | Proc ID OptionalGenericParamList LPAREN ProcParmBody RPAREN RARROW LPAREN RPAREN + | Proc ID OptionalGenericParamList LPAREN ProcParmBody RPAREN RARROW LPAREN ProcRetBody RPAREN + + nonterm ProcDeclStmt ::= + | ProcDecl + + nonterm ProcExpr ::= + | ProcDecl LBRACE Stmts RBRACE + + nonterm GuardVariant ::= + | CONTINUE + | THROW + + nonterm GuardExpr ::= + | Annotations GUARD GuardVariant LBRACE Stmts RBRACE + | Annotations GUARD GuardVariant RARROW ProcRetBody LBRACE Stmts RBRACE + | Annotations GUARD GuardVariant RARROW LPAREN RPAREN LBRACE Stmts RBRACE + | Annotations GUARD GuardVariant RARROW LPAREN ProcRetBody RPAREN LBRACE Stmts RBRACE + + | GUARD GuardVariant LBRACE Stmts RBRACE + | GUARD GuardVariant RARROW ProcRetBody LBRACE Stmts RBRACE + | GUARD GuardVariant RARROW LPAREN RPAREN LBRACE Stmts RBRACE + | GUARD GuardVariant RARROW LPAREN ProcRetBody RPAREN LBRACE Stmts RBRACE + + nonterm CatchExpr ::= + | Annotations CATCH LBRACE Stmts RBRACE + | Annotations CATCH RARROW ProcRetBody LBRACE Stmts RBRACE + | Annotations CATCH RARROW LPAREN RPAREN LBRACE Stmts RBRACE + | Annotations CATCH RARROW LPAREN ProcRetBody RPAREN LBRACE Stmts RBRACE + | CATCH LBRACE Stmts RBRACE + | CATCH RARROW ProcRetBody LBRACE Stmts RBRACE + | CATCH RARROW LPAREN RPAREN LBRACE Stmts RBRACE + | CATCH RARROW LPAREN ProcRetBody RPAREN LBRACE Stmts RBRACE + | Annotations CATCH LPAREN ProcParmBody RPAREN LBRACE Stmts RBRACE + | Annotations CATCH LPAREN ProcParmBody RPAREN RARROW ProcRetBody LBRACE Stmts RBRACE + | Annotations CATCH LPAREN ProcParmBody RPAREN RARROW LPAREN RPAREN LBRACE Stmts RBRACE + | Annotations CATCH LPAREN ProcParmBody RPAREN RARROW LPAREN ProcRetBody RPAREN LBRACE Stmts + RBRACE + | CATCH LPAREN ProcParmBody RPAREN LBRACE Stmts RBRACE + | CATCH LPAREN ProcParmBody RPAREN RARROW ProcRetBody LBRACE Stmts RBRACE + | CATCH LPAREN ProcParmBody RPAREN RARROW LPAREN RPAREN LBRACE Stmts RBRACE + | CATCH LPAREN ProcParmBody RPAREN RARROW LPAREN ProcRetBody RPAREN LBRACE Stmts RBRACE + + nonterm InitFieldList prec pInitFieldList ::= + | ID EQ Expr + | ID EQ Lval + | InitFieldList COMMA ID EQ Expr + | InitFieldList COMMA ID EQ Lval + + nonterm Inits ::= + | INIT LPAREN RPAREN + | INIT LPAREN CallList RPAREN + | INIT LPAREN Expr RPAREN + | INIT LPAREN Lval RPAREN + | INIT LPAREN ExprListBody RPAREN + | INIT LPAREN LvalListBody RPAREN + | INIT LPAREN RPAREN COMMA InitFieldList + | INIT LPAREN CallList RPAREN COMMA InitFieldList + | INIT LPAREN Expr RPAREN COMMA InitFieldList + | INIT LPAREN Lval RPAREN COMMA InitFieldList + | INIT LPAREN ExprListBody RPAREN COMMA InitFieldList + | INIT LPAREN LvalListBody RPAREN COMMA InitFieldList + | Lval LPAREN RPAREN + | Lval LPAREN CallList RPAREN + | Lval LPAREN Expr RPAREN + | LVAL LPAREN LVAL RPAREN + | Lval LPAREN ExprListBody RPAREN + | Lval LPAREN LvalListBody RPAREN + | Lval LPAREN RPAREN COMMA InitFieldList + | Lval LPAREN CallList RPAREN COMMA InitFieldList + | Lval LPAREN Expr RPAREN COMMA InitFieldList + | LVAL LPAREN LVAL RPAREN COMMA InitFieldList + | Lval LPAREN ExprListBody RPAREN COMMA InitFieldList + | Lval LPAREN LvalListBody RPAREN COMMA InitFieldList + | InitFieldList + + nonterm MemberBlock ::= + | Annotations MEMBER LBRACE Stmts RBRACE + | MEMBER LBRACE Stmts RBRACE + + nonterm InitDecl ::= + | Annotations INIT + | Annotations INIT COLON Inits + | Annotations INIT LPAREN RPAREN + | Annotations INIT LPAREN RPAREN COLON Inits + + | INIT + | INIT COLON Inits + | INIT LPAREN RPAREN + | INIT LPAREN RPAREN COLON Inits + + | Annotations INIT LPAREN ProcParmBody RPAREN + | Annotations INIT LPAREN ProcParmBody RPAREN COLON Inits + + | INIT LPAREN ProcParmBody RPAREN + | INIT LPAREN ProcParmBody RPAREN COLON Inits + + nonterm InitDeclStmt ::= + | InitDecl + + nonterm InitExpr ::= + | InitDecl LBRACE Stmts RBRACE + + nonterm ExtendsClass ::= + | epsilon + | EXTENDS TypeName + + nonterm InterfaceList ::= + | TypeName + | InterfaceList COMMA TypeName + nonterm ImplementsInterface ::= + | epsilon prec pImplementsInterface + | IMPLEMENTS InterfaceList + + nonterm ClassDecl ::= + | Annotations CLASS OptionalGenericParamList ExtendsClass ImplementsInterface + | CLASS OptionalGenericParamList ExtendsClass ImplementsInterface + | Annotations CLASS ID OptionalGenericParamList ExtendsClass ImplementsInterface + | CLASS ID OptionalGenericParamList ExtendsClass ImplementsInterface + + nonterm ClassExpr ::= + | ClassDecl LBRACE Stmts RBRACE + + nonterm ExtendsInterface ::= + | epsilon + | EXTENDS InterfaceList + nonterm InterfaceDecl ::= + | Annotations INTERFACE OptionalGenericParamList ExtendsInterface + | INTERFACE OptionalGenericParamList ExtendsInterface + | Annotations INTERFACE ID OptionalGenericParamList ExtendsInterface + | INTERFACE ID OptionalGenericParamList ExtendsInterface + nonterm InterfaceExpr ::= + | InterfaceDecl LBRACE Stmts RBRACE + + nonterm EnumDecl ::= + | Annotations ENUM ID + | ENUM ID + nonterm Enum ::= + | Annotations ID + | ID + nonterm Enums ::= + | Enum + | Enum EQ INT + | Enums COMMA Enum + | Enums COMMA Enum EQ INT + nonterm EnumExpr ::= + | EnumDecl LBRACE Enums RBRACE + + nonterm ImportModuleRelpath ::= + | epsilon prec pImportModuleRelpath + | POW + | ImportModuleRelpath POW + # Use right recursion here in order to be able to specify that only the last + # component of the moduleName defines a lexical variable. + nonterm ImportModuleName ::= + | ID DOT ImportModuleName + | ID prec pImportModuleName + nonterm ImportModule ::= + | ImportModuleRelpath DOT ImportModuleName + | ImportModuleName + + nonterm ImportListItem ::= + | ID OptionalGenericParamList + + nonterm ImportList ::= + | ImportListItem + | ImportList COMMA ImportListItem + nonterm ImportVars ::= + | MUL + | ImportList prec pImportVars + | LPAREN ImportList RPAREN + + nonterm ImportLval ::= + | Annotations IMPORT ImportModule + | IMPORT ImportModule + | Annotations FROM ImportModule IMPORT ImportVars prec pImportLval + | FROM ImportModule IMPORT ImportVars prec pImportLval + + nonterm CallNamed prec pCallNamed ::= + | ID COLON Expr + | ID COLON Lval + nonterm CallNamedList ::= + | CallNamed + | CallNamedList COMMA CallNamed + nonterm CallPargs prec pCallPargs ::= + | COLON Expr + | COLON Lval + nonterm CallKargs ::= + | COLON COLON Expr + | COLON COLON Lval + nonterm CallList ::= + # 1111 + | Expr COMMA CallNamedList COMMA CallPargs COMMA CallKargs + | Lval COMMA CallNamedList COMMA CallPargs COMMA CallKargs + | ExprListBody COMMA CallNamedList COMMA CallPargs COMMA CallKargs + | LvalListBody COMMA CallNamedList COMMA CallPargs COMMA CallKargs + # 1110 + | Expr COMMA CallNamedList COMMA CallPargs + | Lval COMMA CallNamedList COMMA CallPargs + | ExprListBody COMMA CallNamedList COMMA CallPargs + | LvalListBody COMMA CallNamedList COMMA CallPargs + # 1101 + | Expr COMMA CallNamedList COMMA CallKargs + | Lval COMMA CallNamedList COMMA CallKargs + | ExprListBody COMMA CallNamedList COMMA CallKargs + | LvalListBody COMMA CallNamedList COMMA CallKargs + # 1100 + | Expr COMMA CallNamedList + | Lval COMMA CallNamedList + | ExprListBody COMMA CallNamedList + | LvalListBody COMMA CallNamedList + # 1011 + | Expr COMMA CallPargs COMMA CallKargs + | Lval COMMA CallPargs COMMA CallKargs + | ExprListBody COMMA CallPargs COMMA CallKargs + | LvalListBody COMMA CallPargs COMMA CallKargs + # 1010 + | Expr COMMA CallPargs + | Lval COMMA CallPargs + | ExprListBody COMMA CallPargs + | LvalListBody COMMA CallPargs + # 1001 + | Expr COMMA CallKargs + | Lval COMMA CallKargs + | ExprListBody COMMA CallKargs + | LvalListBody COMMA CallKargs + # 1000 + # Everywhere that CallList is used, there must be four companion productions with Expr, Lval, + # ExprListBody, and LvalListBody substituted, instead of the following productions. If the + # following productions were enabled, there would be no consistent way to resolve the + # resulting conflicts. + # + # | Expr + # | Lval + # | ExprListBody + # | LvalListBody + # 0111 + | CallNamedList COMMA CallPargs COMMA CallKargs + # 0110 + | CallNamedList COMMA CallPargs + # 0101 + | CallNamedList COMMA CallKargs + # 0100 + | CallNamedList + # 0011 + | CallPargs COMMA CallKargs + # 0010 + | CallPargs + # 0001 + | CallKargs + nonterm CallThis prec pCallThis ::= + | THIS COLON Expr + | THIS COLON Lval + nonterm CallExpr ::= + | Expr LPAREN RPAREN + | Expr LPAREN CallThis RPAREN + | Expr LPAREN CallList RPAREN + | Expr LPAREN Expr RPAREN + | Expr LPAREN Lval RPAREN + | Expr LPAREN ExprListBody RPAREN + | Expr LPAREN LvalListBody RPAREN + | Expr LPAREN CallThis COMMA CallList RPAREN + | Expr LPAREN CallThis COMMA Expr RPAREN + | Expr LPAREN CallThis COMMA Lval RPAREN + | Expr LPAREN CallThis COMMA ExprListBody RPAREN + | Expr LPAREN CallThis COMMA LvalListBody RPAREN + + | Lval LPAREN RPAREN + | Lval LPAREN CallThis RPAREN + | Lval LPAREN CallList RPAREN + | Lval LPAREN Expr RPAREN + | Lval LPAREN Lval RPAREN + | Lval LPAREN ExprListBody RPAREN + | Lval LPAREN LvalListBody RPAREN + | Lval LPAREN CallThis COMMA CallList RPAREN + | Lval LPAREN CallThis COMMA Expr RPAREN + | Lval LPAREN CallThis COMMA Lval RPAREN + | Lval LPAREN CallThis COMMA ExprListBody RPAREN + | Lval LPAREN CallThis COMMA LvalListBody RPAREN + + nonterm UnaryExpr ::= + | NOT Expr prec pUnaryExpr1 + | NOT Lval prec pUnaryExpr1 + | PLUS Expr prec pUnaryExpr2 + | PLUS Lval prec pUnaryExpr2 + | MINUS Expr prec pUnaryExpr2 + | MINUS Lval prec pUnaryExpr2 + + nonterm PowOp prec pPowOp ::= + | POW + nonterm MulOp prec pMulOp ::= + | MUL + | DIV + | MOD + nonterm PlusOp prec pPlusOp ::= + | PLUS + | MINUS + + nonterm CmpOp1 prec pCmpOp1 ::= + | LT + | LE + | GE + | GT + nonterm CmpOp2 prec pCmpOp2 ::= + | EQEQ + | NOTEQ + + | EQEQEQ + | NOTEQEQ + nonterm CmpOp3 prec pCmpOp3 ::= + | AND + nonterm CmpOp4 prec pCmpOp4 ::= + | XOR + nonterm CmpOp5 prec pCmpOp5 ::= + | OR + + nonterm InExpr prec pCmpOp2 ::= + # in. + | Expr IN Expr + | Expr IN Lval + | Lval IN Expr + | Lval IN Lval + | Expr IN ExprList + | Lval IN ExprList + | ExprList IN Expr + | ExprList IN Lval + | ExprList IN ExprList + + # not in. + | Expr NOT IN Expr + | Expr NOT IN Lval + | Lval NOT IN Expr + | Lval NOT IN Lval + | Expr NOT IN ExprList + | Lval NOT IN ExprList + | ExprList NOT IN Expr + | ExprList NOT IN Lval + | ExprList NOT IN ExprList + + nonterm InfixExpr ::= + # PowOp. + | Expr PowOp Expr prec pPowOp + | Expr PowOp Lval prec pPowOp + | Expr PowOp LPAREN RPAREN prec pPowOp + | Expr PowOp LPAREN CallList RPAREN prec pPowOp + # | Expr PowOp LPAREN Expr RPAREN prec pPowOp + # | Expr PowOp LPAREN Lval RPAREN prec pPowOp + | Expr PowOp LPAREN ExprListBody RPAREN prec pPowOp + | Expr PowOp LPAREN LvalListBody RPAREN prec pPowOp + + | Lval PowOp Expr prec pPowOp + | Lval PowOp Lval prec pPowOp + | Lval PowOp LPAREN RPAREN prec pPowOp + | Lval PowOp LPAREN CallList RPAREN prec pPowOp + # | Lval PowOp LPAREN Expr RPAREN prec pPowOp + # | Lval PowOp LPAREN Lval RPAREN prec pPowOp + | Lval PowOp LPAREN ExprListBody RPAREN prec pPowOp + | Lval PowOp LPAREN LvalListBody RPAREN prec pPowOp + + # MulOp. + | Expr MulOp Expr prec pMulOp + | Expr MulOp Lval prec pMulOp + | Expr MulOp LPAREN RPAREN prec pMulOp + | Expr MulOp LPAREN CallList RPAREN prec pMulOp + # | Expr MulOp LPAREN Expr RPAREN prec pMulOp + # | Expr MulOp LPAREN Lval RPAREN prec pMulOp + | Expr MulOp LPAREN ExprListBody RPAREN prec pMulOp + | Expr MulOp LPAREN LvalListBody RPAREN prec pMulOp + + | Lval MulOp Expr prec pMulOp + | Lval MulOp Lval prec pMulOp + | Lval MulOp LPAREN RPAREN prec pMulOp + | Lval MulOp LPAREN CallList RPAREN prec pMulOp + # | Lval MulOp LPAREN Expr RPAREN prec pMulOp + # | Lval MulOp LPAREN Lval RPAREN prec pMulOp + | Lval MulOp LPAREN ExprListBody RPAREN prec pMulOp + | Lval MulOp LPAREN LvalListBody RPAREN prec pMulOp + + # PlusOp. + | Expr PlusOp Expr prec pPlusOp + | Expr PlusOp Lval prec pPlusOp + | Expr PlusOp LPAREN RPAREN prec pPlusOp + | Expr PlusOp LPAREN CallList RPAREN prec pPlusOp + # | Expr PlusOp LPAREN Expr RPAREN prec pPlusOp + # | Expr PlusOp LPAREN Lval RPAREN prec pPlusOp + | Expr PlusOp LPAREN ExprListBody RPAREN prec pPlusOp + | Expr PlusOp LPAREN LvalListBody RPAREN prec pPlusOp + + | Lval PlusOp Expr prec pPlusOp + | Lval PlusOp Lval prec pPlusOp + | Lval PlusOp LPAREN RPAREN prec pPlusOp + | Lval PlusOp LPAREN CallList RPAREN prec pPlusOp + # | Lval PlusOp LPAREN Expr RPAREN prec pPlusOp + # | Lval PlusOp LPAREN Lval RPAREN prec pPlusOp + | Lval PlusOp LPAREN ExprListBody RPAREN prec pPlusOp + | Lval PlusOp LPAREN LvalListBody RPAREN prec pPlusOp + + # CmpOp1. + | Expr CmpOp1 Expr prec pCmpOp1 + | Expr CmpOp1 Lval prec pCmpOp1 + | Lval CmpOp1 Expr prec pCmpOp1 + | Lval CmpOp1 Lval prec pCmpOp1 + | Expr CmpOp1 ExprList prec pCmpOp1 + | Lval CmpOp1 ExprList prec pCmpOp1 + | ExprList CmpOp1 Expr prec pCmpOp1 + | ExprList CmpOp1 Lval prec pCmpOp1 + | ExprList CmpOp1 ExprList prec pCmpOp1 + + # InExpr. + | InExpr + + # CmpOp2. + | Expr CmpOp2 Expr prec pCmpOp2 + | Expr CmpOp2 Lval prec pCmpOp2 + | Lval CmpOp2 Expr prec pCmpOp2 + | Lval CmpOp2 Lval prec pCmpOp2 + | Expr CmpOp2 ExprList prec pCmpOp2 + | Lval CmpOp2 ExprList prec pCmpOp2 + | ExprList CmpOp2 Expr prec pCmpOp2 + | ExprList CmpOp2 Lval prec pCmpOp2 + | ExprList CmpOp2 ExprList prec pCmpOp2 + + # CmpOp3. + | Expr CmpOp3 Expr prec pCmpOp3 + | Expr CmpOp3 Lval prec pCmpOp3 + | Lval CmpOp3 Expr prec pCmpOp3 + | Lval CmpOp3 Lval prec pCmpOp3 + + # CmpOp4. + | Expr CmpOp4 Expr prec pCmpOp4 + | Expr CmpOp4 Lval prec pCmpOp4 + | Lval CmpOp4 Expr prec pCmpOp4 + | Lval CmpOp4 Lval prec pCmpOp4 + + # CmpOp5. + | Expr CmpOp5 Expr prec pCmpOp5 + | Expr CmpOp5 Lval prec pCmpOp5 + | Lval CmpOp5 Expr prec pCmpOp5 + | Lval CmpOp5 Lval prec pCmpOp5 + + nonterm AssnExprLeft ::= + | LPAREN AssnExprLeft RPAREN + + | Annotations Var COMMA VarRestId + | Var COMMA VarRestId + | Lval COMMA VarRestId prec pAssnExprLeft4 + | LvalListBody COMMA VarRestId prec pAssnExprLeft4 + + | Annotations Var COMMA LBRACKET RBRACKET GenericParamOne COLON Lval prec pAssnExprLeft2 + | Var COMMA LBRACKET RBRACKET GenericParamOne COLON Lval prec pAssnExprLeft2 + | Lval COMMA LBRACKET RBRACKET GenericParamOne COLON Lval prec pAssnExprLeft2 + | LvalListBody COMMA LBRACKET RBRACKET GenericParamOne COLON Lval prec pAssnExprLeft2 + + | VarRestId prec pAssnExprLeft1 + | LBRACKET RBRACKET GenericParamOne COLON Lval prec pAssnExprLeft1 + + | Annotations Var prec pAssnExprLeft5 + | Var prec pAssnExprLeft5 + # Everywhere AssnExprLeft is used, there must be a companion production with Lval substituted, + # instead of the following production. + # + # | Lval + | LvalListBody prec pAssnExprLeft3 + + nonterm Var ::= + | Annotations VAR ID prec pVar1 + | VAR ID prec pVar2 + | Annotations TypeSpec ID prec pVar1 + | TypeSpec ID prec pVar2 + + nonterm VarRestId ::= + | Annotations LBRACKET RBRACKET GenericParamOne ID + | LBRACKET RBRACKET GenericParamOne ID prec pVarRestId + | Annotations LBRACKET RBRACKET GenericParamOne BLANK + | LBRACKET RBRACKET GenericParamOne BLANK prec pVarRestId + + nonterm AttrVar ::= + | Annotations ID + | ID + + nonterm DelimitedAttrExpr ::= + | Annotations ATTR AttrVar LBRACE Stmts RBRACE + | ATTR AttrVar LBRACE Stmts RBRACE + + # Assignment operators. These can only be used with one left side Lval. + nonterm AssnOp ::= + | PLUSEQ + | MINUSEQ + | MULEQ + | DIVEQ + | MODEQ + | POWEQ + + nonterm AssnExpr ::= + | Lval AssnOp Expr prec pAssnExpr1 + | Lval AssnOp Lval prec pAssnExpr1 + | Lval AssnOp LPAREN RPAREN + | Lval AssnOp LPAREN CallList RPAREN + # | Lval AssnOp LPAREN Expr RPAREN + # | Lval AssnOp LPAREN Lval RPAREN + | Lval AssnOp LPAREN ExprListBody RPAREN prec pAssnExpr3 + | Lval AssnOp LPAREN LvalListBody RPAREN + + | Lval EQ Expr prec pAssnExpr2 + | Lval EQ Lval prec pAssnExpr2 + | Lval EQ ExprList prec pAssnExpr2 + + | AssnExprLeft EQ Expr prec pAssnExpr2 + | AssnExprLeft EQ Lval prec pAssnExpr2 + # | Lval EQ Expr prec pAssnExpr2 + | AssnExprLeft EQ ExprList prec pAssnExpr2 + # | Lval EQ ExprList prec pAssnExpr2 + + nonterm ForClause ::= + | FOR AssnExprLeft IN Expr + | FOR AssnExprLeft IN Lval + | FOR Lval IN Expr + | FOR LVAL IN Lval + | FOR AssnExprLeft IN ExprList + | FOR Lval IN ExprList + nonterm ForClauseList ::= + | ForClause + | ForClauseList ForClause + nonterm IfClause ::= + | IF Expr + | IF Lval + nonterm IfClauseList ::= + | IfClause + | IfClauseList IfClause + nonterm DictComprehensionExpr ::= + | LBRACE IN ForClauseList SELECT DictList RBRACE GenericParamTwo + | LBRACE IN ForClauseList IfClauseList SELECT DictList RBRACE GenericParamTwo + nonterm ListComprehensionExpr ::= + | LBRACKET IN ForClauseList SELECT Expr RBRACKET GenericParamOne + | LBRACKET IN ForClauseList SELECT Lval RBRACKET GenericParamOne + | LBRACKET IN ForClauseList SELECT ExprList RBRACKET GenericParamOne + + | LBRACKET IN ForClauseList IfClauseList SELECT Expr RBRACKET GenericParamOne + | LBRACKET IN ForClauseList IfClauseList SELECT Lval RBRACKET GenericParamOne + | LBRACKET IN ForClauseList IfClauseList SELECT ExprList RBRACKET GenericParamOne + + nonterm ElifClause ::= + | ELIF Expr LBRACE Stmts RBRACE + | ELIF Lval LBRACE Stmts RBRACE + | ELIF ExprList LBRACE Stmts RBRACE + nonterm ElifList ::= + | ElifList ElifClause + nonterm ElseClause ::= + | ELSE LBRACE Stmts RBRACE + nonterm IfExpr ::= + | IF Expr LBRACE Stmts RBRACE ElifList ElseClause + | IF Lval LBRACE Stmts RBRACE ElifList ElseClause + | IF ExprList LBRACE Stmts RBRACE ElifList ElseClause + + nonterm IsInListElm ::= + | NOT IN Expr LBRACE Stmts RBRACE + | NOT IN Lval LBRACE Stmts RBRACE + | NOT IN ExprList LBRACE Stmts RBRACE + + | IN Expr LBRACE Stmts RBRACE + | IN Lval LBRACE Stmts RBRACE + | IN ExprList LBRACE Stmts RBRACE + nonterm IsInList ::= + | IsInList IsInListElm + + nonterm IsExpr ::= + | IS InExpr LBRACE Stmts RBRACE IsInList ElseClause + + nonterm DoExpr ::= + | LBRACE Stmts RBRACE + + nonterm ForExpr ::= + | ForClause LBRACE Stmts RBRACE + + nonterm DoWhileExpr ::= + | DO LBRACE Stmts RBRACE WHILE Expr prec pDoWhileExpr1 + | DO LBRACE Stmts RBRACE WHILE Lval prec pDoWhileExpr1 + | DO LBRACE Stmts RBRACE WHILE ExprList prec pDoWhileExpr2 + + nonterm WhileExpr ::= + | WHILE Expr LBRACE Stmts RBRACE + | WHILE Lval LBRACE Stmts RBRACE + | WHILE ExprList LBRACE Stmts RBRACE + + nonterm AssertExpr prec pAssertExpr ::= + | ASSERT Expr + | ASSERT Lval + | ASSERT ExprList + + nonterm DelimitedExpr ::= + | IfExpr + | IsExpr + | DoExpr + | WhileExpr + | ClassExpr + | InterfaceExpr + | EnumExpr + | DelimitedAttrExpr + | ProcExpr + | GuardExpr + | CatchExpr + | InitExpr + | MemberBlock + | ForExpr + + nonterm NondelimitedExpr ::= + | Str prec pNondelimitedExpr1 + | Buf + | FALSE + | TRUE + | NULL + | INT + | FLOAT + | INF + | NAN + | FILE + | LINE + | DEBUG + + | LPAREN Expr RPAREN + # Everywhere that NondelimitedExpr is used, there must be a companion production with Lval + # substituted. + # + # | Lval + | Dict + | List + | CallExpr + | UnaryExpr + | InfixExpr + | AssnExpr + | AssnExprLeft prec pNondelimitedExpr2 + | DictComprehensionExpr + | ListComprehensionExpr + | DoWhileExpr + | AssertExpr + + nonterm ExprSlice prec pExprSlice ::= + | LBRACKET Slice RBRACKET + | LBRACKET RBRACKET + + nonterm ExprSuffix ::= + | ID ExprSlice + + | TypeName DOT ID prec pExprSuffix + + | LvalSuffix DOT ID prec pExprSuffix + | LvalSuffix DOT ID ExprSlice + + | ExprSuffix DOT ID prec pExprSuffix + | ExprSuffix DOT ID ExprSlice + + nonterm Expr ::= + | DelimitedExpr prec pExpr1 + | NondelimitedExpr + # Every use of Expr must be accompanied by a companion Lval production + # + # | Lval + + | Expr ExprSlice + | Lval ExprSlice + + | Expr DOT TypeNameSuffix prec pExpr2 + | Lval DOT TypeNameSuffix prec pExpr2 + + | Expr DOT LvalSuffix prec pExpr2 + | Lval DOT LvalSuffix prec pExpr2 + + | Expr DOT ExprSuffix prec pExpr2 + | Lval DOT ExprSuffix prec pExpr2 + + nonterm ExprListBody prec pExprListBodyA ::= + | Expr COMMA Expr + | Expr COMMA Lval prec pExprListBodyB + | Lval COMMA Expr + | ExprListBody COMMA Expr + | ExprListBody COMMA Lval + | LvalListBody COMMA Expr + + nonterm ExprList ::= + | LPAREN ExprListBody RPAREN prec pExprList2 + | ExprListBody prec pExprList1 + + nonterm ModuleStmt ::= + | Annotations MODULE + | MODULE + + nonterm ReturnStmt ::= + | RETURN + | RETURN LPAREN RPAREN + | RETURN CallList + | RETURN Expr + | RETURN Lval + | RETURN ExprListBody + | RETURN LvalListBody prec pReturnStmt1 + | RETURN LPAREN CallList RPAREN + # | RETURN LPAREN Expr RPAREN + # | RETURN LPAREN Lval RPAREN + | RETURN LPAREN ExprListBody RPAREN + | RETURN LPAREN LvalListBody RPAREN + + nonterm BreakStmt ::= + | BREAK INT + | BREAK + + nonterm ContinueStmt ::= + | CONTINUE INT + | CONTINUE + + nonterm ThrowStmt ::= + | THROW + | THROW LPAREN RPAREN + | THROW CallList + | THROW Expr + | THROW Lval + | THROW ExprListBody + | THROW LvalListBody prec pThrowStmt1 + | THROW LPAREN CallList RPAREN + # | THROW LPAREN Expr RPAREN + # | THROW LPAREN Lval RPAREN + | THROW LPAREN ExprListBody RPAREN + | THROW LPAREN LvalListBody RPAREN + + nonterm CblockStmt ::= + | Annotations CBLOCK + | CBLOCK + + nonterm Stmt ::= + | ModuleStmt + | ClassDecl + | InterfaceDecl + | EnumDecl + | InitDeclStmt + | ProcDeclStmt + | ReturnStmt + | BreakStmt + | ContinueStmt + | ThrowStmt + + | NondelimitedExpr + | Lval + | ExprListprec prec pStmt + + nonterm DelimitedStmt ::= + | CblockStmt + + nonterm StmtList ::= + | Stmt SEMICOLON + | DelimitedStmt + | DelimitedExpr prec pStmtList + + | StmtList Stmt SEMICOLON + | StmtList DelimitedStmt + | StmtList DelimitedExpr prec pStmtList + | StmtList SEMICOLON + + nonterm Stmts ::= + | Stmt + | StmtList + + start Module ::= + | BOI Stmts diff --git a/bootstrap/test/hocc/M.expected b/bootstrap/test/hocc/M.expected new file mode 100644 index 000000000..ff304d7da --- /dev/null +++ b/bootstrap/test/hocc/M.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./M.hmh" +hocc: Generating specification diff --git a/bootstrap/test/hocc/M.hmh b/bootstrap/test/hocc/M.hmh new file mode 100644 index 000000000..267232cdd --- /dev/null +++ b/bootstrap/test/hocc/M.hmh @@ -0,0 +1,30 @@ +# Example grammar G2 from Pager(1977), pp 256. + +hocc + token A + token B + token C + token D + token E + token T + token U + + start X ::= + | A Y D + | A Z C + | A T + | B Y E + | B Z D + | B T + + nonterm Y ::= + | T W + | U X + + nonterm Z ::= T U + + nonterm T ::= U X A + + nonterm W ::= U V + + nonterm V ::= epsilon diff --git a/bootstrap/test/hocc/N.expected b/bootstrap/test/hocc/N.expected new file mode 100644 index 000000000..34cbf3362 --- /dev/null +++ b/bootstrap/test/hocc/N.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./N.hmh" +hocc: Generating specification diff --git a/bootstrap/test/hocc/N.hmh b/bootstrap/test/hocc/N.hmh new file mode 100644 index 000000000..9c99825c3 --- /dev/null +++ b/bootstrap/test/hocc/N.hmh @@ -0,0 +1,23 @@ +# Example grammar G3 from Pager(1977), pp 259. Weak compatibility fails to combine states that would +# be combined if strong compatibility were used. + +hocc + token A + token B + token C + token D + token E + token T + token U + token V + token W + + start X ::= + | A Y D + | A Z C + | B Y E + | B Z D + + nonterm Y ::= T U V + + nonterm Z ::= T U W diff --git a/bootstrap/test/hocc/Parse_a.expected b/bootstrap/test/hocc/Parse_a.expected new file mode 100644 index 000000000..5d3e138a5 --- /dev/null +++ b/bootstrap/test/hocc/Parse_a.expected @@ -0,0 +1,3 @@ +hocc: Parsing "./Parse_a.hmhi" +hocc: Parsing "./Parse_a.hmh" +hocc: Generating specification diff --git a/bootstrap/test/hocc/Parse_a.hmh b/bootstrap/test/hocc/Parse_a.hmh new file mode 100644 index 000000000..205b935ac --- /dev/null +++ b/bootstrap/test/hocc/Parse_a.hmh @@ -0,0 +1,53 @@ +# Matter. + +include hocc + prec p1 + left p2 + right p3 + prec p4 < p1 + left p5 < p1, p2 + right p6 < p3, p4, p5 + + token OP + token PLUS "+" + token UNS of Uns.t + token MINUS prec add + token STAR "*" prec mul + token SLASH "/" of Unit.t prec mul + + nonterm N1 of Unit.t ::= epsilon -> + (a b) + (|a b|) + [a b] + [|a b|] + {a b} + (a + (|a + [a [|a {a b} b|] b] + b|) + b + ) + a; b; c + + nonterm N2 ::= N1 N2 | epsilon + + start N3 prec p1 ::= epsilon + + nonterm N4 of Unit.t ::= epsilon -> () + + start N5 of Unit.t prec p2 ::= epsilon -> () + + nonterm N6 of Unit.t ::= + | N2 + | OP N3 -> () + | OP N4 -> () + | N4 N5 + | epsilon -> () + + nonterm N7 of Unit.t ::= + | o:OP _:N1 N2 -> foo + + Code = { + } + +# Matter. diff --git a/bootstrap/test/hocc/Parse_a.hmhi b/bootstrap/test/hocc/Parse_a.hmhi new file mode 100644 index 000000000..d1b811bd3 --- /dev/null +++ b/bootstrap/test/hocc/Parse_a.hmhi @@ -0,0 +1,5 @@ +# Matter. + +include hocc + +# Matter. diff --git a/bootstrap/test/hocc/Parse_b.expected b/bootstrap/test/hocc/Parse_b.expected new file mode 100644 index 000000000..5a6106449 --- /dev/null +++ b/bootstrap/test/hocc/Parse_b.expected @@ -0,0 +1,3 @@ +hocc: Parsing "./Parse_b.hmhi" +hocc: Parsing "./Parse_b.hmh" +hocc: Generating specification diff --git a/bootstrap/test/hocc/Parse_b.hmh b/bootstrap/test/hocc/Parse_b.hmh new file mode 100644 index 000000000..da321f015 --- /dev/null +++ b/bootstrap/test/hocc/Parse_b.hmh @@ -0,0 +1,2 @@ +hocc + prec p \ No newline at end of file diff --git a/bootstrap/test/hocc/Parse_b.hmhi b/bootstrap/test/hocc/Parse_b.hmhi new file mode 100644 index 000000000..efa806861 --- /dev/null +++ b/bootstrap/test/hocc/Parse_b.hmhi @@ -0,0 +1 @@ +hocc \ No newline at end of file diff --git a/bootstrap/test/hocc/Parse_error_cident.expected b/bootstrap/test/hocc/Parse_error_cident.expected new file mode 100644 index 000000000..7091a0d9e --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_cident.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_cident.hmh" +hocc: At ["./Parse_error_cident.hmh":2:10.."./Parse_error_cident.hmh":2:16): Expected cident diff --git a/bootstrap/test/hocc/Parse_error_cident.hmh b/bootstrap/test/hocc/Parse_error_cident.hmh new file mode 100644 index 000000000..d37247175 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_cident.hmh @@ -0,0 +1,2 @@ +hocc + token uident diff --git a/bootstrap/test/hocc/Parse_error_code.expected b/bootstrap/test/hocc/Parse_error_code.expected new file mode 100644 index 000000000..c0ce9ea88 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_code.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_code.hmh" +hocc: At ["./Parse_error_code.hmh":3:0.."./Parse_error_code.hmh":3:0): Expected Hemlock code diff --git a/bootstrap/test/hocc/Parse_error_code.hmh b/bootstrap/test/hocc/Parse_error_code.hmh new file mode 100644 index 000000000..06403228a --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_code.hmh @@ -0,0 +1,2 @@ +hocc + nonterm N of T.t ::= epsilon -> diff --git a/bootstrap/test/hocc/Parse_error_delimited_rarray.expected b/bootstrap/test/hocc/Parse_error_delimited_rarray.expected new file mode 100644 index 000000000..f28b3708e --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rarray.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_delimited_rarray.hmh" +hocc: At ["./Parse_error_delimited_rarray.hmh":4:0.."./Parse_error_delimited_rarray.hmh":4:0): Expected '|]' diff --git a/bootstrap/test/hocc/Parse_error_delimited_rarray.hmh b/bootstrap/test/hocc/Parse_error_delimited_rarray.hmh new file mode 100644 index 000000000..39825e737 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rarray.hmh @@ -0,0 +1,3 @@ +hocc + nonterm N of T.t ::= + | epsilon -> [| diff --git a/bootstrap/test/hocc/Parse_error_delimited_rbrack.expected b/bootstrap/test/hocc/Parse_error_delimited_rbrack.expected new file mode 100644 index 000000000..a84be3ab7 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rbrack.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_delimited_rbrack.hmh" +hocc: At ["./Parse_error_delimited_rbrack.hmh":4:0.."./Parse_error_delimited_rbrack.hmh":4:0): Expected ']' diff --git a/bootstrap/test/hocc/Parse_error_delimited_rbrack.hmh b/bootstrap/test/hocc/Parse_error_delimited_rbrack.hmh new file mode 100644 index 000000000..1b31cc9c5 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rbrack.hmh @@ -0,0 +1,3 @@ +hocc + nonterm N of T.t ::= + | epsilon -> [ diff --git a/bootstrap/test/hocc/Parse_error_delimited_rcapture.expected b/bootstrap/test/hocc/Parse_error_delimited_rcapture.expected new file mode 100644 index 000000000..fd3b5acd7 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rcapture.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_delimited_rcapture.hmh" +hocc: At ["./Parse_error_delimited_rcapture.hmh":4:0.."./Parse_error_delimited_rcapture.hmh":4:0): Expected '|)' diff --git a/bootstrap/test/hocc/Parse_error_delimited_rcapture.hmh b/bootstrap/test/hocc/Parse_error_delimited_rcapture.hmh new file mode 100644 index 000000000..4b44ecdd9 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rcapture.hmh @@ -0,0 +1,3 @@ +hocc + nonterm N of T.t ::= + | epsilon -> (| diff --git a/bootstrap/test/hocc/Parse_error_delimited_rcurly.expected b/bootstrap/test/hocc/Parse_error_delimited_rcurly.expected new file mode 100644 index 000000000..2b32e6e92 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rcurly.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_delimited_rcurly.hmh" +hocc: At ["./Parse_error_delimited_rcurly.hmh":4:0.."./Parse_error_delimited_rcurly.hmh":4:0): Expected '}' diff --git a/bootstrap/test/hocc/Parse_error_delimited_rcurly.hmh b/bootstrap/test/hocc/Parse_error_delimited_rcurly.hmh new file mode 100644 index 000000000..cdea67ea8 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rcurly.hmh @@ -0,0 +1,3 @@ +hocc + nonterm N of T.t ::= + | epsilon -> { diff --git a/bootstrap/test/hocc/Parse_error_delimited_rparen.expected b/bootstrap/test/hocc/Parse_error_delimited_rparen.expected new file mode 100644 index 000000000..92ff73f2b --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rparen.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_delimited_rparen.hmh" +hocc: At ["./Parse_error_delimited_rparen.hmh":4:0.."./Parse_error_delimited_rparen.hmh":4:0): Expected ')' diff --git a/bootstrap/test/hocc/Parse_error_delimited_rparen.hmh b/bootstrap/test/hocc/Parse_error_delimited_rparen.hmh new file mode 100644 index 000000000..608c38ab2 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_delimited_rparen.hmh @@ -0,0 +1,3 @@ +hocc + nonterm N of T.t ::= + | epsilon -> ( diff --git a/bootstrap/test/hocc/Parse_error_hmhi.expected b/bootstrap/test/hocc/Parse_error_hmhi.expected new file mode 100644 index 000000000..6b736744e --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_hmhi.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_hmhi.hmhi" +hocc: At ["./Parse_error_hmhi.hmhi":1:3.."./Parse_error_hmhi.hmhi":1:3): Expected 'hocc' keyword diff --git a/bootstrap/test/hocc/Parse_error_hmhi.hmh b/bootstrap/test/hocc/Parse_error_hmhi.hmh new file mode 100644 index 000000000..21baf0dab --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_hmhi.hmh @@ -0,0 +1,2 @@ +hocc + prec p diff --git a/bootstrap/test/hocc/Parse_error_hmhi.hmhi b/bootstrap/test/hocc/Parse_error_hmhi.hmhi new file mode 100644 index 000000000..257cc5642 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_hmhi.hmhi @@ -0,0 +1 @@ +foo diff --git a/bootstrap/test/hocc/Parse_error_hocc.expected b/bootstrap/test/hocc/Parse_error_hocc.expected new file mode 100644 index 000000000..c8344a1c3 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_hocc.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_hocc.hmh" +hocc: At ["./Parse_error_hocc.hmh":2:0.."./Parse_error_hocc.hmh":2:0): Expected 'hocc' statement diff --git a/bootstrap/test/hocc/Parse_error_hocc.hmh b/bootstrap/test/hocc/Parse_error_hocc.hmh new file mode 100644 index 000000000..257cc5642 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_hocc.hmh @@ -0,0 +1 @@ +foo diff --git a/bootstrap/test/hocc/Parse_error_nonterm_cce.expected b/bootstrap/test/hocc/Parse_error_nonterm_cce.expected new file mode 100644 index 000000000..4a20d8840 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_nonterm_cce.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_nonterm_cce.hmh" +hocc: At ["./Parse_error_nonterm_cce.hmh":3:0.."./Parse_error_nonterm_cce.hmh":3:0): Expected '::=' diff --git a/bootstrap/test/hocc/Parse_error_nonterm_cce.hmh b/bootstrap/test/hocc/Parse_error_nonterm_cce.hmh new file mode 100644 index 000000000..ad9acc84b --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_nonterm_cce.hmh @@ -0,0 +1,2 @@ +hocc + nonterm N of T.t diff --git a/bootstrap/test/hocc/Parse_error_of_type_dot.expected b/bootstrap/test/hocc/Parse_error_of_type_dot.expected new file mode 100644 index 000000000..3f4011d31 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_of_type_dot.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_of_type_dot.hmh" +hocc: At ["./Parse_error_of_type_dot.hmh":3:0.."./Parse_error_of_type_dot.hmh":3:0): Expected '.' diff --git a/bootstrap/test/hocc/Parse_error_of_type_dot.hmh b/bootstrap/test/hocc/Parse_error_of_type_dot.hmh new file mode 100644 index 000000000..abec735b7 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_of_type_dot.hmh @@ -0,0 +1,2 @@ +hocc + token A of T diff --git a/bootstrap/test/hocc/Parse_error_precs.expected b/bootstrap/test/hocc/Parse_error_precs.expected new file mode 100644 index 000000000..2fbe19a6f --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_precs.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_precs.hmh" +hocc: At ["./Parse_error_precs.hmh":3:0.."./Parse_error_precs.hmh":3:0): Expected uident diff --git a/bootstrap/test/hocc/Parse_error_precs.hmh b/bootstrap/test/hocc/Parse_error_precs.hmh new file mode 100644 index 000000000..8fccb921b --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_precs.hmh @@ -0,0 +1,2 @@ +hocc + prec p < a, diff --git a/bootstrap/test/hocc/Parse_error_precs_lt.expected b/bootstrap/test/hocc/Parse_error_precs_lt.expected new file mode 100644 index 000000000..eac077b2d --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_precs_lt.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_precs_lt.hmh" +hocc: At ["./Parse_error_precs_lt.hmh":3:0.."./Parse_error_precs_lt.hmh":3:0): Expected uident diff --git a/bootstrap/test/hocc/Parse_error_precs_lt.hmh b/bootstrap/test/hocc/Parse_error_precs_lt.hmh new file mode 100644 index 000000000..a409a574a --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_precs_lt.hmh @@ -0,0 +1,2 @@ +hocc + prec p < diff --git a/bootstrap/test/hocc/Parse_error_prod_param_type.expected b/bootstrap/test/hocc/Parse_error_prod_param_type.expected new file mode 100644 index 000000000..da73438b1 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_prod_param_type.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_prod_param_type.hmh" +hocc: At ["./Parse_error_prod_param_type.hmh":3:10.."./Parse_error_prod_param_type.hmh":3:12): Expected production parameter type diff --git a/bootstrap/test/hocc/Parse_error_prod_param_type.hmh b/bootstrap/test/hocc/Parse_error_prod_param_type.hmh new file mode 100644 index 000000000..8812a56f6 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_prod_param_type.hmh @@ -0,0 +1,3 @@ +hocc + nonterm N of T.t ::= + | x:42 -> () diff --git a/bootstrap/test/hocc/Parse_error_reduction_arrow.expected b/bootstrap/test/hocc/Parse_error_reduction_arrow.expected new file mode 100644 index 000000000..f73d11882 --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_reduction_arrow.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_reduction_arrow.hmh" +hocc: At ["./Parse_error_reduction_arrow.hmh":4:0.."./Parse_error_reduction_arrow.hmh":4:0): Expected '->' diff --git a/bootstrap/test/hocc/Parse_error_reduction_arrow.hmh b/bootstrap/test/hocc/Parse_error_reduction_arrow.hmh new file mode 100644 index 000000000..d6884cbdd --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_reduction_arrow.hmh @@ -0,0 +1,3 @@ +hocc + nonterm N of T.t ::= + | epsilon diff --git a/bootstrap/test/hocc/Parse_error_uident.expected b/bootstrap/test/hocc/Parse_error_uident.expected new file mode 100644 index 000000000..513b9a32a --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_uident.expected @@ -0,0 +1,2 @@ +hocc: Parsing "./Parse_error_uident.hmh" +hocc: At ["./Parse_error_uident.hmh":2:9.."./Parse_error_uident.hmh":2:15): Expected uident diff --git a/bootstrap/test/hocc/Parse_error_uident.hmh b/bootstrap/test/hocc/Parse_error_uident.hmh new file mode 100644 index 000000000..8818d5afb --- /dev/null +++ b/bootstrap/test/hocc/Parse_error_uident.hmh @@ -0,0 +1,2 @@ +hocc + prec Cident diff --git a/bootstrap/test/hocc/dune b/bootstrap/test/hocc/dune new file mode 100644 index 000000000..bc6ae7e9e --- /dev/null +++ b/bootstrap/test/hocc/dune @@ -0,0 +1,369 @@ +(rule + (deps + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to help_a.out (run %{bin:hocc} -v -verbose -txt -text -html -hmh -hocc -c -canonical -hm -hemlock -ml -ocaml -s Foo -src Foo -d bar -dstdir bar -h))))) +(rule + (alias runtest) + (action (diff help_a.expected help_a.out))) + +(rule + (deps + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to help_b.out (run %{bin:hocc} -v -verbose -txt -text -html -hmh -hocc -c -canonical -hm -hemlock -ml -ocaml -s Foo -src Foo -d bar -dstdir bar -no-such-option))))) +(rule + (alias runtest) + (action (diff help_b.expected help_b.out))) + +(rule + (deps + (glob_files Parse_a.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_a.out (run %{bin:hocc} -v -s Parse_a))))) +(rule + (alias runtest) + (action (diff Parse_a.expected Parse_a.out))) + +(rule + (deps + (glob_files Parse_b.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_b.out (run %{bin:hocc} -v -s Parse_b))))) +(rule + (alias runtest) + (action (diff Parse_b.expected Parse_b.out))) + +(rule + (deps + (glob_files Parse_error_uident.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_uident.out (run %{bin:hocc} -v -s Parse_error_uident))))) +(rule + (alias runtest) + (action (diff Parse_error_uident.expected Parse_error_uident.out))) + +(rule + (deps + (glob_files Parse_error_cident.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_cident.out (run %{bin:hocc} -v -s Parse_error_cident))))) +(rule + (alias runtest) + (action (diff Parse_error_cident.expected Parse_error_cident.out))) + +(rule + (deps + (glob_files Parse_error_precs_lt.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_precs_lt.out (run %{bin:hocc} -v -s Parse_error_precs_lt))))) +(rule + (alias runtest) + (action (diff Parse_error_precs_lt.expected Parse_error_precs_lt.out))) + +(rule + (deps + (glob_files Parse_error_precs.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_precs.out (run %{bin:hocc} -v -s Parse_error_precs))))) +(rule + (alias runtest) + (action (diff Parse_error_precs.expected Parse_error_precs.out))) + +(rule + (deps + (glob_files Parse_error_of_type_dot.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_of_type_dot.out (run %{bin:hocc} -v -s Parse_error_of_type_dot))))) +(rule + (alias runtest) + (action (diff Parse_error_of_type_dot.expected Parse_error_of_type_dot.out))) + +(rule + (deps + (glob_files Parse_error_delimited_rparen.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_delimited_rparen.out (run %{bin:hocc} -v -s Parse_error_delimited_rparen))))) +(rule + (alias runtest) + (action (diff Parse_error_delimited_rparen.expected Parse_error_delimited_rparen.out))) + +(rule + (deps + (glob_files Parse_error_delimited_rcapture.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_delimited_rcapture.out (run %{bin:hocc} -v -s Parse_error_delimited_rcapture))))) +(rule + (alias runtest) + (action (diff Parse_error_delimited_rcapture.expected Parse_error_delimited_rcapture.out))) + +(rule + (deps + (glob_files Parse_error_delimited_rbrack.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_delimited_rbrack.out (run %{bin:hocc} -v -s Parse_error_delimited_rbrack))))) +(rule + (alias runtest) + (action (diff Parse_error_delimited_rbrack.expected Parse_error_delimited_rbrack.out))) + +(rule + (deps + (glob_files Parse_error_delimited_rarray.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_delimited_rarray.out (run %{bin:hocc} -v -s Parse_error_delimited_rarray))))) +(rule + (alias runtest) + (action (diff Parse_error_delimited_rarray.expected Parse_error_delimited_rarray.out))) + +(rule + (deps + (glob_files Parse_error_delimited_rcurly.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_delimited_rcurly.out (run %{bin:hocc} -v -s Parse_error_delimited_rcurly))))) +(rule + (alias runtest) + (action (diff Parse_error_delimited_rcurly.expected Parse_error_delimited_rcurly.out))) + +(rule + (deps + (glob_files Parse_error_code.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_code.out (run %{bin:hocc} -v -s Parse_error_code))))) +(rule + (alias runtest) + (action (diff Parse_error_code.expected Parse_error_code.out))) + +(rule + (deps + (glob_files Parse_error_prod_param_type.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_prod_param_type.out (run %{bin:hocc} -v -s Parse_error_prod_param_type))))) +(rule + (alias runtest) + (action (diff Parse_error_prod_param_type.expected Parse_error_prod_param_type.out))) + +(rule + (deps + (glob_files Parse_error_reduction_arrow.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_reduction_arrow.out (run %{bin:hocc} -v -s Parse_error_reduction_arrow))))) +(rule + (alias runtest) + (action (diff Parse_error_reduction_arrow.expected Parse_error_reduction_arrow.out))) + +(rule + (deps + (glob_files Parse_error_nonterm_cce.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_nonterm_cce.out (run %{bin:hocc} -v -s Parse_error_nonterm_cce))))) +(rule + (alias runtest) + (action (diff Parse_error_nonterm_cce.expected Parse_error_nonterm_cce.out))) + +(rule + (deps + (glob_files Parse_error_hocc.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_hocc.out (run %{bin:hocc} -v -s Parse_error_hocc))))) +(rule + (alias runtest) + (action (diff Parse_error_hocc.expected Parse_error_hocc.out))) + +(rule + (deps + (glob_files Parse_error_hmhi.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Parse_error_hmhi.out (run %{bin:hocc} -v -s Parse_error_hmhi))))) +(rule + (alias runtest) + (action (diff Parse_error_hmhi.expected Parse_error_hmhi.out))) + +(rule + (deps + (glob_files Example1.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Example1.out (run %{bin:hocc} -v -s Example1))))) +(rule + (alias runtest) + (action (diff Example1.expected Example1.out))) + +(rule + (deps + (glob_files Hocc.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Hocc.out (run %{bin:hocc} -v -s Hocc))))) +(rule + (alias runtest) + (action (diff Hocc.expected Hocc.out))) + +(rule + (deps + (glob_files A.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to A.out (run %{bin:hocc} -v -s A))))) +(rule + (alias runtest) + (action (diff A.expected A.out))) + +(rule + (deps + (glob_files B.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to B.out (run %{bin:hocc} -v -s B))))) +(rule + (alias runtest) + (action (diff B.expected B.out))) + +(rule + (deps + (glob_files C.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to C.out (run %{bin:hocc} -v -s C))))) +(rule + (alias runtest) + (action (diff C.expected C.out))) + +(rule + (deps + (glob_files D.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to D.out (run %{bin:hocc} -v -s D))))) +(rule + (alias runtest) + (action (diff D.expected D.out))) + +(rule + (deps + (glob_files E.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to E.out (run %{bin:hocc} -v -s E))))) +(rule + (alias runtest) + (action (diff E.expected E.out))) + +(rule + (deps + (glob_files F.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to F.out (run %{bin:hocc} -v -s F))))) +(rule + (alias runtest) + (action (diff F.expected F.out))) + +(rule + (deps + (glob_files M.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to M.out (run %{bin:hocc} -v -s M))))) +(rule + (alias runtest) + (action (diff M.expected M.out))) + +(rule + (deps + (glob_files N.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to N.out (run %{bin:hocc} -v -s N))))) +(rule + (alias runtest) + (action (diff N.expected N.out))) + +(rule + (deps + (glob_files Lyken.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Lyken.out (run %{bin:hocc} -v -s Lyken))))) +(rule + (alias runtest) + (action (diff Lyken.expected Lyken.out))) diff --git a/bootstrap/test/hocc/help_a.expected b/bootstrap/test/hocc/help_a.expected new file mode 100644 index 000000000..496b871c4 --- /dev/null +++ b/bootstrap/test/hocc/help_a.expected @@ -0,0 +1,25 @@ +hocc usage: hocc + +Options: + -h[elp] : Print command usage and exit. + -v[erbose] : Print progress information during parser generation. + -txt | -text : Write a detailed automoton description in plain text format + to "/hocc/.txt". + -html : Write a detailed automoton description in internally + hyperlinked HTML format to "/hocc/.html". + -hmh | -hocc : Write a complete grammar specification in hocc format to + "/hocc/.hmh", but with all non-terminal + types and reduction code omitted. + -c[anonical] : Generate a canonical LR(1) parser rather than a compact + LR(1) parser. + -hm | -hemlock : Generate a Hemlock-based parser implementation and write it + to "/.hm[i]". + -ml | -ocaml : Generate an OCaml-based parser implementation and write it + to "/.ml[i]". This is brittle + functionality intended only for Hemlock bootstrapping. + -s[rc] : Path and module name of input source, where inputs match + ".hmh[i]" and "" comprises the source directory + and module name, "[/]". +-d[stdir] : Path to directory in which to place generated output, such + that output file paths match "/[hocc/].*". + Defaults to "". diff --git a/bootstrap/test/hocc/help_b.expected b/bootstrap/test/hocc/help_b.expected new file mode 100644 index 000000000..27fcf25c8 --- /dev/null +++ b/bootstrap/test/hocc/help_b.expected @@ -0,0 +1,26 @@ +hocc: Invalid command line parameter: "-no-such-option" +hocc usage: hocc + +Options: + -h[elp] : Print command usage and exit. + -v[erbose] : Print progress information during parser generation. + -txt | -text : Write a detailed automoton description in plain text format + to "/hocc/.txt". + -html : Write a detailed automoton description in internally + hyperlinked HTML format to "/hocc/.html". + -hmh | -hocc : Write a complete grammar specification in hocc format to + "/hocc/.hmh", but with all non-terminal + types and reduction code omitted. + -c[anonical] : Generate a canonical LR(1) parser rather than a compact + LR(1) parser. + -hm | -hemlock : Generate a Hemlock-based parser implementation and write it + to "/.hm[i]". + -ml | -ocaml : Generate an OCaml-based parser implementation and write it + to "/.ml[i]". This is brittle + functionality intended only for Hemlock bootstrapping. + -s[rc] : Path and module name of input source, where inputs match + ".hmh[i]" and "" comprises the source directory + and module name, "[/]". +-d[stdir] : Path to directory in which to place generated output, such + that output file paths match "/[hocc/].*". + Defaults to "". diff --git a/doc/design/index.md b/doc/design/index.md index 53cb56aeb..bad36e84a 100644 --- a/doc/design/index.md +++ b/doc/design/index.md @@ -1,4 +1,4 @@ -# Hemlock +# Hemlock Design [Hemlock](https://github.com/BranchTaken/Hemlock) is a systems programming language, but not all systems are alike. Hemlock is intentionally constrained to excel for a (large) subset of possible diff --git a/doc/index.md b/doc/index.md new file mode 100644 index 000000000..0e970b794 --- /dev/null +++ b/doc/index.md @@ -0,0 +1,5 @@ +# Hemlock Documentation + +- [Design](design/index.md) +- Tools + + [`hocc`](tools/hocc.md) diff --git a/doc/tools/hocc.md b/doc/tools/hocc.md new file mode 100644 index 000000000..c95e1e84e --- /dev/null +++ b/doc/tools/hocc.md @@ -0,0 +1,898 @@ +# hocc + +`hocc` is an [LR(1) parser generator](https://en.wikipedia.org/wiki/Canonical_LR_parser). Its name +carries on a long tradition, to wit: + +- [`yacc`](https://en.wikipedia.org/wiki/Yacc) stands for "Yet Another Compiler Compiler". Clearly + the name derives from "yack", as in, "Chuck's dinner didn't sit well and he yacked it." +- `hocc` stands for "Hardly Original Compiler Compiler". The name derives from "hock", as in, "Hank + hocked a loogie." + +Both programs interpret high-level human-written parser descriptions and produce output unfit for +human consumption. However `hocc` has several distinguishing features relative to `yacc`, aside from +interoperating with [Hemlock](https://github.com/BranchTaken/Hemlock) rather than +[C](https://en.wikipedia.org/wiki/The_C_Programming_Language). + +- `hocc` generates LR(1) rather than [LALR(1)](https://en.wikipedia.org/wiki/LALR_parser) parsers + using a fast behavior-preserving compaction algorithm[^pager1977] that reduces the state machine + size relative to the canonical LR(1) algorithm[^knuth1965]. +- `hocc`'s precedence facilities are more precise and easier to use without inadvertently masking + grammar ambiguities. Whereas `yacc` supports only a single linear precedence ordering, `hocc` + supports arbitrarily many directed acyclic precedence graphs. Given this more powerful conflict + resolution mechanism, `hocc` refuses to generate parsers for ambiguous grammars. +- `hocc` supports an automated error recovery algorithm[^diekmann2020] based on minimum-cost repair + sequences. + +## Command usage + +`hocc ` + +Options: + +- `-h[elp]`: Print command usage and exit. +- `-v[erbose]`: Print progress information during parser generation. +- `-txt` | `-text`: Write a detailed automoton description in plain text format to + `/hocc/.txt`. +- `-html`: Write a detailed automoton description in internally hyperlinked HTML format to + `/hocc/.html`. +- `-hmh` | `-hocc`: Write a complete grammar specification in `hocc` format to + `/hocc/.hmh`, but with all non-terminal types and reduction code omitted. +- `-c[anonical]`: Generate a canonical LR(1) parser rather than a compact LR(1) parser. +- `-hm` | `-hemlock`: Generate a Hemlock-based parser implementation and write it to + `/.hm[i]`. +- `-ml` | `-ocaml`: Generate an OCaml-based parser implementation and write it to + `/.ml[i]`. This is brittle functionality intended only for Hemlock + bootstrapping. +- `-s[rc] `: Path and module name of input source, where inputs match `.hmh[i]` and + `` comprises the source directory and module name, `[/]`. +- `-d[stdir] `: Path to directory in which to place generated output, such that output file + paths match `/[hocc/].*`. Defaults to ``. + +Syntax errors in the input file may prevent file generation. Specification errors do not prevent +report and graph file generation, but all specification errors must be resolved for parser +generation to succeed. Some syntax errors in the embedded Hemlock code may pass through `hocc` +unnoticed. + +Example invocations: + +- `hocc -hm -src Parser`: Read `Parser.hmh[i]` and generate `Parser.hm[i]`. +- `hocc -verbose -report -graph -hocc -hemlock -src src/Parser -d obj`: Verbosely read + `src/Parser.hmh[i]` and generate `obj/hocc/Parser.{report,dot,hmh}` and `obj/Parser.{hm,hmi}`. + +## Generated API + +The generated parser is encapsulated in a module with an interface similar to the following. The +interface is mainly relevant to application code which utilizes the generated parser rather than the +specification itself, with the exception that non-terminals may need to refer to the `Token.t` type. +Note that the `EPSILON` token identifier is reserved as the token associated with the start state at +the base of the parser stack; it remains on the stack until parsing accepts and is therefore visible +to introspection at any intermediate parse state. The `OMEGA` token identifier is reserved as a +pseudo-token which terminates start symbols; although it is never constructed, it can appear in +lookahead sets and is therefore exposed for parser introspection purposes. + +The generated parser intentionally does not support effects in reduction code, so that intermediate +parser states can be used as persistent reusable snapshots. + +```hemlock +{ + Spec = { + Assoc = { + type t: t = + | Left + | Right + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + Prec = { + type t: t = { + index: uns # Index in `precs` array. + name: string + assoc: option Assoc.t + doms: Set.t uns # Indices in `precs` array of dominator precedences. + } + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + precs: array Prec.t + [@@doc "Array of precedences, where each element's `index` field corresponds to the + element's array index."] + + Token = { + type t: t = { + index: uns # Index in `symbols` array. + name: string + alias: option string + prec: option Prec.t + } + + hash_map: t -> Hash.State.t -> Hash.State.t + cmp: t -> t -> Cmp.t + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + Prod = { + type t: t = { + index: uns # Index in `reductions` array. + lhs_index: uns + rhs_indexes: array uns + prec: option Prec.t + } + + hash_map: t -> Hash.State.t -> Hash.State.t + cmp: t -> t -> Cmp.t + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + Nonterm = { + type t: t = { + index: uns # Index in `symbols` array. + name: string + start: bool + prec: option Prec.t + prods: Set.t Prod.t + } + + hash_map: t -> Hash.State.t -> Hash.State.t + cmp: t -> t -> Cmp.t + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + Symbol = { + type t: t = + | Token of Token.t + | Nonterm of Nonterm.t + + hash_map: t -> Hash.State.t -> Hash.State.t + cmp: t -> t -> Cmp.t + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + symbols: array Symbol.t + [@@doc "Array of symbols, where each element's `index` field corresponds to the element's + array index."] + + Item = { + type t: t = { + prod: Prod.t + dot_pos: uns + lookahead: Set.t Token.t + } + + hash_map: t -> Hash.State.t -> Hash.State.t + cmp: t -> t -> Cmp.t + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + ItemSet = { + type t: t = { + kernel: Set.t Item.t + added: Set.t Item.t + } + + hash_map: t -> Hash.State.t -> Hash.State.t + cmp: t -> t -> Cmp.t + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + Action = { + type t: t = + | Shift of uns + | Reduce of uns + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + State = { + type t: t = { + index: uns # Index in `states` array. + itemset: ItemSet.t + actions: Map.t Symbol.t Action.t Symbol + goto: Map.t Token.t uns Token + } + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + states: array State.t + [@@doc "Array of CFSM states, where each element's `index` field corresponds to the + element's array index."] + } + + Token = { + type t: t = + # One variant per `token` statement, e.g. `A` and `B`. + | A of TypeA.t + | B of TypeB.t + | EPSILON of unit # Reserved. + | OMEGA of unit # Reserved. + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + + spec: t -> Spec.Token.t + } + + Nonterm = { + type t: t = + # One variant per `nonterm`/`start` statement, e.g. `S` and `N`. + | S of TypeS.t + | N of TypeN.t + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + + spec: t -> Spec.Nonterm.t + } + + Symbol = { + type t: t = + | Token of Token.t + | Nonterm of Nonterm.t + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + State = { + type t: t = uns + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + + spec: t -> Spec.State.t + } + + type stack_elm: stack_elm = (Symbol.t, State.t) + type stack: stack = list stack_elm + type reduction: reduction = stack -> stack + + reductions: array reduction + [@@doc "Array of reductions, where each element's `index` field corresponds to the element's + array index."] + + Status = { + type t: t = + # `feed`/`step` may produce these variants; `next` fast-forwards over them. + | ShiftPrefix of (Token.t, State.t) + | ShiftAccept of (Token.t, State.t) + | Reduce of reduction + # Common variants. + | Prefix # Valid parse prefix; more input needed. + | Accept of Nonterm.t # Successful parse result. + | Reject of Token.t # Syntax error due to unexpected token. + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + } + + type t: t = { + stack: stack + status: status + } + + Start = { + # One submodule per `start` symbol, e.g. `S`. + S = { + boi: t + } + } + + feed: Token.t -> t -> t + [@@doc "`feed token t` returns a result with status in {`ShiftPrefix`, `ShiftAccept`, + `Reject`}. `t.status` must be `Prefix`."] + + step: t -> t + [@@doc "`step t` returns the result of applying one state transition to `t`. `t.status` must + be in {`ShiftPrefix`, `ShiftAccept`, `Reduce`}."] + + next: -> Token.t -> t -> t + [@@doc "`next token t` calls `feed token t` and fast-forwards via `step` calls to return a + result with status in {`Prefix`, `Accept`, `Reject`}. `t.status` must be `Prefix`."] + } +``` + +## Parser specification + +The `hocc` specification grammar is layered onto Hemlock's grammar via the addition of several +keywords: + +- Parser: `hocc` +- Symbols: + + [Tokens](#tokens): `token` + + [Non-terminals](#non-terminals): `nonterm`, `start` + + [Productions](#productions): `epsilon` +- [Precedence](#precedence): `prec`, `left`, `right` + +A valid parser specification is encapsulated by a `hocc` statement and describes how to construct a +parse tree of symbols. `token` statements correspond to terminal symbols, i.e. leaf nodes in the +parse tree, whereas non-terminal `start`/`nonterm` statements correspond to internal nodes in the +parse tree. A parse tree always has a non-terminal start symbol at its root. Non-terminals have +associated production patterns that specify how to construct non-terminal nodes during post-order +tree construction. Precedences may be declared via the `prec`/`left`/`right` statements and symbols +may be assigned those precedences for use during conflict resolution. + +The following subsections document specification semantics. See the `hocc` [grammar](#grammar) +specification for comprehensive syntax details. + +### Tokens + +Token identifiers match `[_]*[A-Z][A-Za-z0-9_']*` in conformance with Hemlock's capitalized +identifier syntax. By convention the `hocc` documentation restricts token identifiers to +`[A-Z][A-Z0-9_]*` to distinguish tokens from non-terminals, but other conventions can work just as +well. + +```hocc +hocc + token SOME_TOKEN +``` + +In practice, many token types serve as punctuation and have invariant contents. These token types +can be declared with a string alias, which can then be used in production patterns. + +```hocc +hocc + token LPAREN "(" +``` + +Tokens with variant contents must have a declared data type, since the implicit token data type is +`Unit.t`. The data type must be of the form `.`, where the module provides +`hash_fold`, `cmp`, and `pp` functions in support of the type. All of the `Basis` modules which +implement data types meet these requirements. + +```hocc +hocc + token VALUE of Zint.t +``` + +Tokens may be assigned [precedence](#precedence) to aid in conflict resolution. + +```hocc +hocc + left p + token X prec p +``` + +### Non-terminals + +Non-terminal identifiers match `[_]*[A-Z][A-Za-z0-9_']*` in conformance with Hemlock's capitalized +identifier syntax. By convention the `hocc` documentation restricts non-terminal identifiers to +`[A-Z][A-Za-z0-9]*` to distinguish non-terminals from tokens, but other conventions can work just as +well. + +```hocc +hocc + nonterm SomeNonterm ::= # [...] + start SomeStart ::= # [...] +``` + +As for tokens, non-terminals with variant contents must have a declared data type, since the +implicit non-terminal data type is `Unit.t`. A parser which universally utilizes implicitly typed +non-terminals does not construct a parse tree, but it may still be useful as a recognizer, or as an +abstract grammar specification which `hocc` can verify without generating a parser. + +```hocc +hocc + nonterm SomeNonterm of Node.t ::= # [...] + start SomeStart of Node.t ::= # [...] +``` + +#### Productions + +Each non-terminal symbol has one or more associated productions, which denote patterns for combining +symbols to construct a symbol during post-order parse tree construction. + +```hocc +hocc + token SOME_TOKEN + nonterm SomeNonterm ::= SOME_TOKEN + start SomeStart ::= SomeNonterm SOME_TOKEN +``` + +As a special case, the `epsilon` keyword denotes an empty pattern which can be used to construct a +non-terminal without combining any symbols. + +```hocc +hocc + token A + token B + nonterm N ::= + | A + | epsilon + start S ::= N B +``` + +All of the above examples use non-terminals of implicit data type `Unit.t`, which also implies +trivial `()` "reduction" code. This can be written explicitly. + +```hocc +hocc + token A + token B + nonterm N of Unit.t ::= + | A + | epsilon + -> () + start S of Unit.t ::= N B -> () +``` + +Parsers which construct a parse tree may need to associate production-specific reduction code rather +than sharing the reduction code with all of a non-terminal's productions. As for Hemlock pattern +matching, all productions which share reduction code must specify equivalently typed lexical +bindings. + +```hocc +hocc + token U of Uns.t + token PLUS "+" + start S of Uns.t ::= + | PLUS u1:U u2:U + | u1:U "+" u2:U + | u1:U u2:U _:PLUS -> + u1 + u2 +``` + +Ordinarily, the characteristic finite state machine (CFSM) corresponding to an LR(1) grammar delays +each transition until the lookahead symbol becomes available. However this poses a challenge for +start symbols because there is no concrete lookahead symbol past the end of input. The following +invalid grammar would infinitely recurse. + +```hocc +# Invalid (infinite recursion). +hocc + token U of Uns.t + start S of Uns.t ::= + | u:U s:S -> u + s + | epsilon -> 0 +``` + +A typical solution to this challenge is to require the application to signal end of input to the +CFSM via a dedicated API. However `hocc` uses the same approach as Menhir [[^fpottier]] and instead +proactively (transitively) reduces when the current symbol unambiguously delimits a valid start +symbol reduction. Some start symbols may trivially meet the requirements for proactive reduction, +e.g. for a grammar which incrementally parses a file comprising newline-separated statements, each +of which is encapsulated by a start symbol. However, many grammars do need to manually incorporate +an explicit end of input token. The above grammar can be repaired as follows. + +```hocc +# Valid, though right-associative. +hocc + token U of Uns.t + token EOI + start S of Uns.t ::= + | u:U s:S -> u + s + | EOI -> 0 +``` + +Note that the above grammar is right-associative only because the `EOI` repair is simpler to +demonstrate in that context. The following left-associative grammar is preferable in practice +because it incrementally constructs the parse tree rather than delaying all reductions until `EOI`. + +```hocc +# Valid and left-associative. +hocc + token U of Uns.t + token EOI + start S of Uns.t ::= + | s:S u:U -> s + u + | s:S EOI -> s + | epsilon -> 0 +``` + +### Precedence + +A parser specification may contain conflicts wherein a parser state encodes multiple valid actions +for one or more inputs. `hocc` refuses to generate parsers which contain unresolved conflicts. +Parser specifications can often be refactored or expanded to eliminate conflicts, but such revisions +may reduce clarity and maintainability. Precedences provide a mechanism for conflict resolution, +i.e. explicit choice of actions. `hocc` attempts to resolve conflicts based on the precedences +assigned to tokens and productions. + +Each production can specify its precedence, or if all of a non-terminal's productions are to have +the same precedence, the precedence can be more succinctly specified for the non-terminal as a +whole. It is an error to explicitly specify both a non-terminal's precedence and the precedence of +any of its productions. + +Precedences may be defined with any of the following associativities: + +- `prec`: Do not resolve conflicts via associativity. Non-associativity is useful for specifying + precedence-based resolutions without inadvertently masking conflicts. +- `left`: Resolve shift/reduce conflicts by reducing. This induces left associativity, e.g. + `2 + 3 + 4` is parsed as `(2 + 3) + 4`. +- `right`: Resolve shift/reduce conflicts by shifting. This induces right associativity, e.g. + `2 + 3 + 4` is parsed as `2 + (3 + 4)`. All else being equal, prefer left associativity to + minimize intermediate parser state. + +Precedences can be defined via the `prec`, `left`, and `right` statements, and they may optionally +be ordered via `<` relationships with previously defined precedences, irrespective of associativity. +These precedence relationships are used to compute the transitive closure of precedence orderings. +Precedences with disjoint relationships are incomparable, i.e. they have no relative ordering. By +default, all tokens and productions have a *lack* of precedence, which is equivalent to each such +token/production being assigned a unique disjoint `prec` precedence. + +Conflicts may occur between two or more actions, of which at least one is a reduce action. Such an +action set induces shift/reduce and/or reduce/reduce conflicts; by construction shift/shift +conflicts cannot occur. Given conflicting actions A and B, A "dominates" B if A is preferred over B. +For conflict resolution to succeed, one action must dominate all other conflicting actions. The +rules for conflict resolution are as follows. If none of the rules apply, conflict resolution fails. + +- If one action has higher precedence than all other actions, that action dominates. +- If a subset of actions has higher precedence than all other actions, and the actions in the + highest-precedence subset have equal associativity, associativity resolves the conflict under + either of the following circumstances: + + `left`: A single reduce action dominates shift actions. + + `right`: Shift actions dominate one or more reduce actions. + +Associativity suffices for resolving simple shift/reduce conflicts as in e.g. `2 + 3 + 4`, so that +it is deterministically parsed as `(2 + 3) + 4` (as in the following example specification) or +`2 + (3 + 4)`. + +```hocc +hocc + left add + token PLUS prec add + token INT of Int.t + nonterm Expr of Int.t ::= + | x:INT -> x + | e0:Expr PLUS e1:Expr prec add -> Int.(e0 + e1) +``` + +Alternatively, precedence ordering can resolve shift/reduce conflicts, though associativity is +preferable when applicable. + +```hocc +hocc + prec add + prec plus < add + token PLUS prec plus + token INT of Int.t + nonterm Expr of Int.t ::= + | x:INT -> x + | e0:Expr PLUS e1:Expr prec add -> Int.(e0 + e1) +``` + +Precedence ordering can also resolve reduce/reduce conflicts between productions, which is beyond +the power of associativity. In the following parser specification, `MUL` has precedence over `PLUS` +due to the precedence relationship `add < mul`, so `2 + 3 * 4` is parsed as `2 + (3 * 4)`. + +```hocc +hocc + left mul + token MUL prec mul + left add < mul + token PLUS prec add + token INT of Int.t + nonterm Expr of Int.t ::= + | e0:Expr MUL e1:Expr prec mul -> Int.(e0 + e1) + | e0:Expr PLUS e1:Expr prec add -> Int.(e0 + e1) + | x:INT -> x +``` + +Precedence relationships are optional in precedence declarations. Contrived examples follow. + +```hocc +hocc + left a + left b < a + left c < a + left d < b, c # Transitive: a + right e + prec f < d, e # Transitive: a, b, c +``` + +Precedences are bound to tokens, non-terminals, and productions using the optional `prec` reference +clause. Omitting the `prec` reference clause is equivalent to referring to a unique disjoint `prec` +precedence. The following example demonstrates the `prec` clause syntax. + +```hocc +hocc + prec p1 + left p2 + + token FOO prec p1 + + nonterm Bar prec p2 + | FOO + | epsilon + + start Biz + | Bar FOO prec p1 +``` + +The `OMEGA` token is implicitly defined with the reserved precedence `omega` as follows to +facilitate manual conflict resolution. + +```hocc +hocc + prec omega + token OMEGA "ω" prec omega +``` + +## Example + +The following example implements a simple mathematical expression calculator. + +`Example.hmhi`: + +```hocc +open import Basis + +# Export the parser API so that alternatives to `calculate` can be implemented. `hocc` expands to a +# module signature. +include hocc + +calulate: string -> zint + [@@doc "Calculate the result of a simple arithmetic expression comprising non-negative integers + and `+`, `-`, `*`, and `/` operators. Tokens must be separated by one or more spaces."] +``` + +`Example.hmh`: + +```hocc +open import Basis + +# Specify the parser. `hocc ...` expands to a module implementation, `{ ... }`. +include hocc + left mul + token STAR "*" prec mul + token SLASH "/" prec mul + nonterm MulOp of Token.t ::= + | "*" -> STAR + | "/" -> SLASH + + left add < mul + token PLUS "+" prec add + token MINUS "-" prec add + nonterm AddOp of Token.t ::= + | "+" -> PLUS + | "-" -> MINUS + + token INT of Zint.t + nonterm Expr of Zint.t ::= + | e0:Expr op:MulOp e1:Expr -> + match op with + | MulOp STAR -> Zint.(e0 * e1) + | MulOp SLASH -> Zint.(e0 / e1) + | e0:Expr op:AddOp e1:Expr prec add -> + match op with + | AddOp PLUS -> Zint.(e0 + e1) + | AddOp MINUS -> Zint.(e0 - e1) + | x:INT -> x + + token EOI + start Answer of Zint.t ::= + | e:Expr EOI -> e + +# Tokenize `s`, e.g. "2 + 3 * 4", and append an `EOI` token. +tokenize s = + s |> String.split_rev ~f:(fn cp -> Codepoint.O.(cp = ' ')) + |> List.rev_filter ~f:(fn s -> String.length s <> 0) + |> List.rev_map ~f:fn s -> + let open Token + match s with + | "*" -> STAR + | "/" -> SLASH + | "+" -> PLUS + | "-" -> MINUS + | _ -> INT (Zint.of_string s) + |> List.push Token.EOI + |> List.rev + +# Calculate the result of the arithmetic expression expressed in `s`, e.g. "2 + 3 * 4". +calculate s = + List.fold_until (tokenize s) ~init:Start.Answer.boi ~f:fn parser tok -> + let parser' = Start.Answer.next tok parser + let done = match status parser' with + | Prefix -> false + | Accept _ + | Error _ -> true + parser', done + |> function + | Accept answer -> answer + | Prefix _ -> halt "Partial input" + | Error _ -> halt "Parse error" +``` + +To generate Hemlock code from the above inputs, run `hocc -hm -s Example`. + +### Grammar + +The `hocc` specification language grammar is equivalent to the following specification. + +```hocc +hocc + # hocc-specific keywords + token HOCC "hocc" + token NONTERM "nonterm" + token EPSILON "epsilon" + token START "start" + token PREC "prec" + token LEFT "left" + token RIGHT "right" + + # Identifiers + token UIDENT # Uncapitalized + token CIDENT # Capitalized + token USCORE "_" + + # Token alias + token STRING + + # Punctuation/separators + token COLON_COLON_EQ "::=" + token OF "of" + token DOT "." + token ARROW "->" + token BAR "|" + token LT "<" + token COMMA "," + token SEMI ";" + token LINE_DELIM + + # Left-right paired delimiters + token INDENT + token DEDENT + token LPAREN "(" + token RPAREN ")" + token LCAPTURE "(|" + token RCAPTURE "|)" + token LBRACK "[" + token RBRACK "]" + token LARRAY "[|" + token RARRAY "|]" + token LCURLY "{" + token RCURLY "}" + + # Miscellaneous Hemlock token in embedded code + token CODE_TOKEN + + # End of input, used to terminate start symbols + token EOI + + nonterm Ident ::= UIDENT | CIDENT | "_" + + nonterm PrecsTl ::= + | "," UIDENT PrecsTl + | epsilon + + nonterm Precs ::= UIDENT PrecsTl + + nonterm PrecRels ::= + | "<" Precs + | epsilon + + nonterm PrecType ::= "prec" | "left" | "right" + + nonterm Prec ::= PrecType UIDENT PrecRels + + nonterm OfType ::= "of" CIDENT "." UIDENT + + nonterm OfType0 ::= + | OfType + | epsilon + + nonterm PrecRef ::= + | "prec" UIDENT + | epsilon + + nonterm TokenAlias ::= + | STRING + | epsilon + + nonterm Token ::= "token" CIDENT TokenAlias OfType0 PrecRef + + nonterm Sep ::= LINE_DELIM | ";" | "|" + + nonterm CodesTl ::= + | Sep Code CodesTl + | epsilon + + nonterm Codes ::= Code CodesTl + + nonterm Codes0 ::= + | Codes + | epsilon + + nonterm Delimited ::= + | INDENT Codes DEDENT + | "(" Codes0 ")" + | "(|" Codes0 "|)" + | "[" Codes0 "]" + | "[|" Codes0 "|]" + | "{" Codes0 "}" + + nonterm CodeTl ::= + | Delimited CodeTl + | CODE_TOKEN CodeTl + | epsilon + + nonterm Code ::= + | Delimited CodeTl + | CODE_TOKEN CodeTl + + nonterm ProdParamType ::= + | CIDENT + | STRING + + nonterm ProdParamIdent ::= + | Ident ":" + | epsilon + + nonterm ProdParam ::= ProdParamIdent ProdParamType + + nonterm ProdParamsTl ::= + | ProdParam ProdParamsTl + | epsilon + + nonterm ProdParams ::= ProdParam ProdParamsTl + + nonterm ProdPattern ::= + | ProdParams + | "epsilon" + + nonterm Prod ::= ProdPattern PrecRef + + nonterm ProdsTl ::= + | "|" Prod ProdsTl + | epsilon + + nonterm Prods ::= + | "|" Prod ProdsTl + | Prod ProdsTl + + nonterm Reduction ::= Prods "->" Code + + nonterm ReductionsTl ::= + | "|" Reduction ReductionsTl + | epsilon + + nonterm Reductions ::= + | "|" Reduction ReductionsTl + | Reduction ReductionsTl + + nonterm NontermType ::= "nonterm" | "start" + + nonterm Nonterm ::= + | NontermType CIDENT OfType PrecRef "::=" Reductions + | NontermType CIDENT PrecRef "::=" Prods + + nonterm Stmt ::= + | Prec + | Token + | Nonterm + | Code + + nonterm StmtsTl ::= + | LINE_DELIM Stmt StmtsTl + | epsilon + + nonterm Stmts ::= Stmt StmtsTl + + nonterm Hocc ::= "hocc" INDENT Stmts DEDENT + + nonterm Matter ::= + | CODE_TOKEN Matter + | epsilon + + start Hmh ::= Matter Hocc Matter EOI + + start Hmhi ::= Matter "hocc" Matter EOI +``` + +## Citations + +[^knuth1965]: + Donald Knuth, + “On the translation of languages from left to right,” + Information and Control 8 (6), 607–639, July 1965. + +[^pager1977]: + David Pager, + “A Practical General Method for Constructing LR(k) Parsers,” + Acta Informatica 7, 249-268, 1977. + +[^diekmann2020]: + Lukas Diekmann and Laurence Tratt, + “Don't Panic! Better, Fewer, Syntax Errors for LR Parsers,” + 34th European Conference on Object-Oriented Programming (ECOOP 2020), Article No. 6, pages 6:1–6:32. + +[^fpottier]: + François Pottier and Yann Régis-Gianas, + “Menhir LR(1) Parser Generator,” + http://gallium.inria.fr/~fpottier/menhir/ diff --git a/ide/kakoune/hocc.kak b/ide/kakoune/hocc.kak new file mode 100644 index 000000000..24cc45803 --- /dev/null +++ b/ide/kakoune/hocc.kak @@ -0,0 +1,122 @@ +# https://github.com/BranchTaken/Hemlock +# ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +# Detection +# ‾‾‾‾‾‾‾‾‾ + +hook global BufCreate .*\.hmhi? %{ + set-option buffer filetype hocc +} + +# Initialization +# ‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +hook global WinSetOption filetype=hocc %{ + require-module hocc + set-option window static_words %opt{hocc_static_words} +} + +hook -group hocc-highlight global WinSetOption filetype=hocc %{ + add-highlighter window/hocc ref hocc + hook -once -always window WinSetOption filetype=.* %{ remove-highlighter window/hocc } +} + +provide-module hocc %{ + +# Highlighters +# ‾‾‾‾‾‾‾‾‾‾‾‾ + + +add-highlighter shared/hocc regions +add-highlighter shared/hocc/code default-region group + +add-highlighter shared/hocc/code/cident regex \b[_]*[A-Z][A-Za-z0-9_']*\b 0:module +add-highlighter shared/hocc/code/uident regex \b[_]*[a-z][A-Za-z0-9_']*\b 0:Default +add-highlighter shared/hocc/code/tab regex \t 0:Error +add-highlighter shared/hocc/code/unaligned regex ^(\ \ )*\ (?![\ ]) 0:Error +add-highlighter shared/hocc/code/unaligned_continue_keyword regex ^(\ \ \ \ )*(and|also|as|else|external|of|or|then|when|with)\b 0:Error +add-highlighter shared/hocc/code/unaligned_continue_punctuation regex ^(\ \ \ \ )*([\x7D\]),!'\\\-+*/%@$<=>\|:.]) 0:Error +add-highlighter shared/hocc/code/unaligned_continue_caret regex ^(\ \ \ \ )*([\^](?![&A-Za-z_])) 0:Error +add-highlighter shared/hocc/code/trailing regex (\ )+$ 0:ExcessWhitespace +add-highlighter shared/hocc/code/interior_multispace regex (?<=\S)(\ ){2,}(?=\S) 0:ExcessWhitespace + +add-highlighter shared/hocc/comment region -recurse \Q(* \Q(* \Q*) fill comment +add-highlighter shared/hocc/line_comment region '#' '\n' fill comment + +add-highlighter shared/hocc/string region ((?]?(\+|_)?#?0?\*\(\^ () fill meta +add-highlighter shared/hocc/string/precision region \%('.')?[<^>]?(\+|_)?#?0?([1-9][0-9]*)?\.=?\*\(\^ () fill meta +add-highlighter shared/hocc/string/fmt region \%('.')?[<^>]?(\+|_)?#?0?([1-9][0-9]*)?(\.=?[1-9][0-9]*)?[bodx]?[mac]?p?f\(\^ () fill meta +add-highlighter shared/hocc/string/value region \%('.')?[<^>]?(\+|_)?#?0?([1-9][0-9]*)?(\.=?[1-9][0-9]*)?[bodx]?[mac]?p?([bnzcs]|([ui](8|16|32|64|128|256|512)?)|(r(32|64)?))([\ ]*[-+*/%@^$<=>|:.][-+*/%@$<=>|:.~?]*[\ ]*)?\(\^ () fill meta + +add-highlighter shared/hocc/string/width_precision region \^\)\.=?\*\(\^ () fill meta +add-highlighter shared/hocc/string/width_fmt region \^\)(\.=?[1-9][0-9]*)?[bodx]?[mac]?p?f\(\^ () fill meta +add-highlighter shared/hocc/string/width_value region \^\)(\.=?[1-9][0-9]*)?[bodx]?[mac]?p?([bnzcs]|([ui](8|16|32|64|128|256|512)?)|(r(32|64)?))([\ ]*[-+*/%@^$<=>|:.][-+*/%@$<=>|:.~?]*[\ ]*)?\(\^ () fill meta +add-highlighter shared/hocc/string/precision_fmt region \^\)[bodx]?[mac]?p?f\(\^ () fill meta +add-highlighter shared/hocc/string/precision_value region \^\)[bodx]?[mac]?p?([bnzcs]|([ui](8|16|32|64|128|256|512)?)|(r(32|64)?))([\ ]*[-+*/%@^$<=>|:.][-+*/%@$<=>|:.~?]*[\ ]*)?\(\^ () fill meta +add-highlighter shared/hocc/string/fmt_value region \^\)([\ ]*[-+*/%@^$<=>|:.][-+*/%@$<=>|:.~?]*[\ ]*)?\(\^ () fill meta + +add-highlighter shared/hocc/string/unprotected region (?|:.~?]*} 0:operator +add-highlighter shared/hocc/code/infix_operator regex %{[-+*/%@^$<=>|:.][-+*/%@^$<=>|:.~?]*} 0:operator + +add-highlighter shared/hocc/code/boolean regex \b(true|false)\b 0:value + +add-highlighter shared/hocc/code/bin_integer regex \b(0b)([_]*[01][01_]*)(([ui](8|16|32|64|128|256|512)?)|[zn])?\b 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/oct_integer regex \b(0o)([_]*[0-7][0-7_]*)(([ui](8|16|32|64|128|256|512)?)|[zn])?\b 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/hex_integer regex \b(0x)([_]*[0-9a-f][0-9a-f_]*)(([ui](8|16|32|64|128|256|512)?)|[zn])?\b 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/integer regex \b(([1-9][0-9_]*)|0[_]*)(([ui](8|16|32|64|128|256|512)?)|[zn])?\b 1:value 3:attribute + +add-highlighter shared/hocc/code/bin_real_dot regex \b(0b)([01][01_]*\.(?!\.)[01_]*(p_*[+\-]?_*[0-9][0-9_]*)?)(r(32|64)?)? 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/bin_real_p regex \b(0b)([01][01_]*p_*[+\-]?_*[0-9][0-9_]*)(r(32|64)?)?\b 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/bin_real_r regex \b(0b)([01][01_]*)(r(32|64)?)\b 1:attribute 2:value 3:attribute + +add-highlighter shared/hocc/code/oct_real_dot regex \b(0o)([0-7][0-7_]*\.(?!\.)[0-7_]*(p_*[+\-]?_*[0-9][0-9_]*)?)(r(32|64)?)? 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/oct_real_p regex \b(0o)([0-7][0-7_]*p_*[+\-]?_*[0-9][0-9_]*)(r(32|64)?)?\b 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/oct_real_r regex \b(0o)([0-7][0-7_]*)(r(32|64)?)\b 1:attribute 2:value 3:attribute + +add-highlighter shared/hocc/code/hex_real_dot regex \b(0x)([0-9a-f][0-9a-f_]*\.(?!\.)[0-9a-f_]*(p_*[+\-]?_*[0-9][0-9_]*)?)(r(32|64)?)? 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/hex_real_p regex \b(0x)([0-9a-f][0-9a-f_]*p_*[+\-]?_*[0-9][0-9_]*)(r(32|64)?)?\b 1:attribute 2:value 3:attribute +add-highlighter shared/hocc/code/hex_real_r regex \b(0x)([0-9a-f][0-9a-f_]*)(r(32|64)?)\b 1:attribute 2:value 3:attribute + +add-highlighter shared/hocc/code/real_dot regex \b([0-9][0-9_]*\.(?!\.)[0-9_]*(e_*[+\-]?_*[0-9][0-9_]*)?)(r(32|64)?)? 1:value 2:attribute +add-highlighter shared/hocc/code/real_e regex \b([0-9][0-9_]*e_*[+\-]?_*[0-9][0-9_]*)(r(32|64)?)?\b 1:value 2:attribute +add-highlighter shared/hocc/code/real_r regex \b([0-9][0-9_]*)(r(32|64)?)\b 1:value 2:attribute + +# Macro +# ‾‾‾‾‾ + +evaluate-commands %sh{ + keywords="and|also|as|conceal|effect|else|expose|external|fn|function|if|import|include|lazy|let" + keywords="${keywords}|match|mutability|of|open|or|rec|then|type|when|with" + keywords="${keywords}|hocc|token|nonterm|start|epsilon|prec|left|right" + + printf %s\\n "declare-option str-list hocc_static_words ${keywords}" | tr '|' ' ' + + printf %s " + add-highlighter shared/hocc/code/ regex \b(${keywords})\b 0:keyword + " +} + +}