From 68f651c30c25544bd349363b45387a1f722c8591 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 18 Jul 2024 12:10:46 -0700 Subject: [PATCH] Implement hocc Hemlock/OCaml code generation --- .../bin/hocc/{reduction.ml => callback.ml} | 19 +- bootstrap/bin/hocc/callback.mli | 72 + bootstrap/bin/hocc/callbacks.ml | 16 + bootstrap/bin/hocc/callbacks.mli | 21 + bootstrap/bin/hocc/code.ml | 3746 +++++++++++++++++ bootstrap/bin/hocc/code.mli | 19 + bootstrap/bin/hocc/conf.ml | 11 +- bootstrap/bin/hocc/description.ml | 519 +++ bootstrap/bin/hocc/description.mli | 7 + bootstrap/bin/hocc/grammar.ml | 27 + bootstrap/bin/hocc/grammar.mli | 4 + bootstrap/bin/hocc/hocc.ml | 50 +- bootstrap/bin/hocc/io.ml | 41 +- bootstrap/bin/hocc/io.mli | 6 +- bootstrap/bin/hocc/lr0Itemset.ml | 10 +- bootstrap/bin/hocc/lr1Itemset.ml | 9 + bootstrap/bin/hocc/lr1Itemset.mli | 4 + bootstrap/bin/hocc/parse.ml | 682 ++- bootstrap/bin/hocc/prec.ml | 2 +- bootstrap/bin/hocc/prec.mli | 4 +- bootstrap/bin/hocc/precs.mli | 2 +- bootstrap/bin/hocc/prod.ml | 12 +- bootstrap/bin/hocc/prod.mli | 8 +- bootstrap/bin/hocc/prods.ml | 4 +- bootstrap/bin/hocc/prods.mli | 4 +- bootstrap/bin/hocc/qualifiedType.ml | 99 +- bootstrap/bin/hocc/qualifiedType.mli | 33 +- bootstrap/bin/hocc/reduction.mli | 64 - bootstrap/bin/hocc/reductions.ml | 16 - bootstrap/bin/hocc/reductions.mli | 20 - bootstrap/bin/hocc/scan.ml | 10 +- bootstrap/bin/hocc/scan.mli | 1 + bootstrap/bin/hocc/spec.ml | 824 +--- bootstrap/bin/hocc/spec.mli | 33 +- bootstrap/bin/hocc/state.ml | 3 + bootstrap/bin/hocc/state.mli | 4 + bootstrap/bin/hocc/symbol.ml | 13 +- bootstrap/bin/hocc/symbol.mli | 9 +- bootstrap/bin/hocc/symbols.ml | 3 + bootstrap/bin/hocc/symbols.mli | 14 +- bootstrap/test/hocc/Binding_error.expected | 3 + bootstrap/test/hocc/Binding_error.hmh | 8 + bootstrap/test/hocc/Example.expected | 2 + bootstrap/test/hocc/Example.expected.hm | 1408 +++++++ bootstrap/test/hocc/Example.expected.hmi | 260 ++ bootstrap/test/hocc/Example.expected.txt | 2 +- bootstrap/test/hocc/Example.hmh | 31 +- bootstrap/test/hocc/Example.hmhi | 2 +- bootstrap/test/hocc/Example_b.expected | 13 + bootstrap/test/hocc/Example_b.expected.hm | 1416 +++++++ bootstrap/test/hocc/Example_b.expected.hmi | 258 ++ bootstrap/test/hocc/Example_b.hmh | 74 + bootstrap/test/hocc/Example_b.hmhi | 7 + bootstrap/test/hocc/Example_c.expected | 13 + bootstrap/test/hocc/Example_c.expected.hm | 1410 +++++++ bootstrap/test/hocc/Example_c.expected.hmi | 259 ++ bootstrap/test/hocc/Example_c.hmh | 68 + bootstrap/test/hocc/Example_c.hmhi | 8 + bootstrap/test/hocc/Example_ml.expected | 1 + bootstrap/test/hocc/Example_ml.expected.ml | 1586 +++++++ bootstrap/test/hocc/Example_ml.expected.mli | 262 ++ bootstrap/test/hocc/Example_ml.expected.txt | 197 + bootstrap/test/hocc/Example_ml.hmh | 77 + bootstrap/test/hocc/Example_ml.hmhi | 10 + bootstrap/test/hocc/Example_rno.expected.txt | 2 +- bootstrap/test/hocc/Parse_a.hmh | 2 +- bootstrap/test/hocc/dune | 75 +- bootstrap/test/hocc/help_a.expected | 11 +- bootstrap/test/hocc/help_b.expected | 11 +- doc/tools/hocc.md | 216 +- 70 files changed, 12854 insertions(+), 1283 deletions(-) rename bootstrap/bin/hocc/{reduction.ml => callback.ml} (89%) create mode 100644 bootstrap/bin/hocc/callback.mli create mode 100644 bootstrap/bin/hocc/callbacks.ml create mode 100644 bootstrap/bin/hocc/callbacks.mli create mode 100644 bootstrap/bin/hocc/code.ml create mode 100644 bootstrap/bin/hocc/code.mli create mode 100644 bootstrap/bin/hocc/description.ml create mode 100644 bootstrap/bin/hocc/description.mli create mode 100644 bootstrap/bin/hocc/grammar.ml create mode 100644 bootstrap/bin/hocc/grammar.mli delete mode 100644 bootstrap/bin/hocc/reduction.mli delete mode 100644 bootstrap/bin/hocc/reductions.ml delete mode 100644 bootstrap/bin/hocc/reductions.mli create mode 100644 bootstrap/test/hocc/Binding_error.expected create mode 100644 bootstrap/test/hocc/Binding_error.hmh create mode 100644 bootstrap/test/hocc/Example.expected.hm create mode 100644 bootstrap/test/hocc/Example.expected.hmi create mode 100644 bootstrap/test/hocc/Example_b.expected create mode 100644 bootstrap/test/hocc/Example_b.expected.hm create mode 100644 bootstrap/test/hocc/Example_b.expected.hmi create mode 100644 bootstrap/test/hocc/Example_b.hmh create mode 100644 bootstrap/test/hocc/Example_b.hmhi create mode 100644 bootstrap/test/hocc/Example_c.expected create mode 100644 bootstrap/test/hocc/Example_c.expected.hm create mode 100644 bootstrap/test/hocc/Example_c.expected.hmi create mode 100644 bootstrap/test/hocc/Example_c.hmh create mode 100644 bootstrap/test/hocc/Example_c.hmhi create mode 100644 bootstrap/test/hocc/Example_ml.expected create mode 100644 bootstrap/test/hocc/Example_ml.expected.ml create mode 100644 bootstrap/test/hocc/Example_ml.expected.mli create mode 100644 bootstrap/test/hocc/Example_ml.expected.txt create mode 100644 bootstrap/test/hocc/Example_ml.hmh create mode 100644 bootstrap/test/hocc/Example_ml.hmhi diff --git a/bootstrap/bin/hocc/reduction.ml b/bootstrap/bin/hocc/callback.ml similarity index 89% rename from bootstrap/bin/hocc/reduction.ml rename to bootstrap/bin/hocc/callback.ml index ab5457d39..8bf889181 100644 --- a/bootstrap/bin/hocc/reduction.ml +++ b/bootstrap/bin/hocc/callback.ml @@ -8,7 +8,7 @@ module T = struct binding: string option; symbol_name: string; qtype: QualifiedType.t; - prod_param: Parse.prod_param option; + prod_param: Parse.nonterm_prod_param option; } let hash_fold {binding; symbol_name; _} state = @@ -115,9 +115,10 @@ module T = struct module Index = Uns type t = { index: Index.t; - lhs: QualifiedType.t; + lhs_name: string; + lhs_qtype: QualifiedType.t; rhs: Params.t; - code: Parse.code option; + code: Parse.nonterm_code option; } let hash_fold {index; _} state = @@ -126,10 +127,11 @@ module T = struct let cmp {index=index0; _} {index=index1; _} = Index.cmp index0 index1 - let pp {index; lhs; rhs; code} formatter = + let pp {index; lhs_name; lhs_qtype; rhs; code} formatter = formatter |> Fmt.fmt "{index=" |> Index.pp index - |> Fmt.fmt "; lhs=" |> QualifiedType.pp lhs + |> Fmt.fmt "; lhs_name=" |> String.pp lhs_name + |> Fmt.fmt "; lhs_qtype=" |> QualifiedType.pp lhs_qtype |> Fmt.fmt "; rhs=" |> Params.pp rhs |> Fmt.fmt "; code=" |> (Option.pp Parse.fmt_code) code |> Fmt.fmt "}" @@ -137,5 +139,8 @@ end include T include Identifiable.Make(T) -let init ~index ~lhs ~rhs ~code = - {index; lhs; rhs; code} +let init ~index ~lhs_name ~lhs_qtype ~rhs ~code = + {index; lhs_name; lhs_qtype; rhs; code} + +let is_epsilon {rhs; _} = + Params.is_empty rhs diff --git a/bootstrap/bin/hocc/callback.mli b/bootstrap/bin/hocc/callback.mli new file mode 100644 index 000000000..7ddfde834 --- /dev/null +++ b/bootstrap/bin/hocc/callback.mli @@ -0,0 +1,72 @@ +(** Reduction callback code associated with a production. Conceptually a reduction callback is + simply a block of code, but there is quite a bit of hair related to binding parameters to + production symbols. *) + +open Basis +open Basis.Rudiments + +(** Reduction callback parameter. *) +module Param : sig + type t = { + binding: string option; + (** Optional binding name for reduction callback code. Generated code must specify a binding for + each RHS symbol it needs to access. *) + + symbol_name: string; + (** Symbol name corresponding to a [start]/[nonterm] or [token] declaration. *) + + qtype: QualifiedType.t; + (** Qualified type of parameter, e.g. [explicit_opt=Some {module_:"SomeToken"; type_:"t"}]. *) + + prod_param: Parse.nonterm_prod_param option; + (** Declaration AST. *) + } + + include IdentifiableIntf.S with type t := t + + val init: binding:string option -> symbol_name:string -> qtype:QualifiedType.t + -> prod_param:Parse.nonterm_prod_param option -> t +end + +(** Ordered container of reduction callback parameters. *) +module Params : sig + type t + type elm = Param.t + + include IdentifiableIntf.S with type t := t + include ContainerIntf.SMonoArray with type t := t with type elm := elm + include ContainerIntf.SMonoIndex with type t := t with type elm := elm + + val init: Io.t -> Param.t array -> Io.t * t + val length: t -> uns + val range: t -> range + val get: uns -> t -> Param.t + val map: f:(Param.t -> 'a) -> t -> 'a array +end + +module Index = Uns +type t = { + index: Index.t; + (** Unique reduction callback index. *) + + lhs_name: string; + (** Name of enclosing nonterm. *) + + lhs_qtype: QualifiedType.t; + (** Qualified type of LHS. *) + + rhs: Params.t; + (** RHS parameters. *) + + code: Parse.nonterm_code option; + (** Optional embedded code to be invoked by generated parser. *) +} + +include IdentifiableIntf.S with type t := t + +val init: index:Index.t -> lhs_name:string -> lhs_qtype:QualifiedType.t -> rhs:Params.t + -> code:Parse.nonterm_code option -> t +(** Used only by [Callbacks.init]. *) + +val is_epsilon: t -> bool +(** [is_epsilon t] returns true if [t] is an ε reduction callback, i.e. it has an empty RHS. *) diff --git a/bootstrap/bin/hocc/callbacks.ml b/bootstrap/bin/hocc/callbacks.ml new file mode 100644 index 000000000..c3765bc50 --- /dev/null +++ b/bootstrap/bin/hocc/callbacks.ml @@ -0,0 +1,16 @@ +open Basis +open! Basis.Rudiments + +type t = (Callback.Index.t, Callback.t, Callback.Index.cmper_witness) Ordmap.t + +let empty = Ordmap.empty (module Callback.Index) + +let length = Ordmap.length + +let insert ~lhs:Symbols.{name; qtype; _} ~rhs ~code t = + let index = length t in + let callback = Callback.init ~index ~lhs_name:name ~lhs_qtype:qtype ~rhs ~code in + callback, Ordmap.insert_hlt ~k:index ~v:callback t + +let fold ~init ~f t = + Ordmap.fold ~init ~f:(fun accum (_, callback) -> f accum callback) t diff --git a/bootstrap/bin/hocc/callbacks.mli b/bootstrap/bin/hocc/callbacks.mli new file mode 100644 index 000000000..7777ceacc --- /dev/null +++ b/bootstrap/bin/hocc/callbacks.mli @@ -0,0 +1,21 @@ +(** Collection of all reduction callbacks, with automatic assignment of unique indexes. *) +open! Basis +open! Basis.Rudiments + +type t + +val empty: t +(** [empty] returns an empty set of reduction callbacks. *) + +val insert: lhs:Symbols.info -> rhs:Callback.Params.t -> code:Parse.nonterm_code option -> t + -> Callback.t * t +(** [insert ~lhs ~rhs ~code t] creates a [Callback.t] with unique index and returns both the + reduction callback and a new [t] with the reduction callback inserted. *) + +val length: t -> uns +(** [length t] returns the number of reduction callbacks in [t]. *) + +val fold: init:'accum -> f:('accum -> Callback.t -> 'accum) -> t -> 'accum +(** [fold ~init ~f t] iteratively applies [f] to the reduction callbacks in [t], in increasing index + order. +*) diff --git a/bootstrap/bin/hocc/code.ml b/bootstrap/bin/hocc/code.ml new file mode 100644 index 000000000..1c3cc7eac --- /dev/null +++ b/bootstrap/bin/hocc/code.ml @@ -0,0 +1,3746 @@ +open Basis +open! Basis.Rudiments + +let line_raw_indentation line = + String.C.Slice.fold_until ~init:0L ~f:(fun col cp -> + match cp with + | cp when Codepoint.(cp = of_char ' ') -> succ col, false + | _ -> col, true + ) line + +let line_context_raw_indentation line_context = + let line = + line_context + |> List.map ~f:Hmc.Source.Slice.to_string + |> String.join + |> String.C.Slice.of_string + in + line_raw_indentation line + +let line_context_indentation line_context = + let raw_indentation = line_context_raw_indentation line_context in + (* Continuation lines have an extra 2 spaces; omit them from the result if present. *) + raw_indentation - (raw_indentation % 4L) + +let indentation_of_hocc = function + | Scan.Token.HmcToken _ -> not_reached () + | HoccToken {source; _} -> Hmc.Source.Slice.line_context source |> line_context_indentation + +let macro_of_line line = + let open String.C in + let ldangle = Codepoint.kv 0xabL (*'«'*) in + let rdangle = Codepoint.kv 0xbbL (*'»'*) in + match Slice.lfind ldangle line with + | None -> None + | Some base -> begin + let slice = Slice.of_cursors ~base ~past:(Slice.past line) in + match Slice.rfind rdangle slice with + | None -> None + | Some rdangle_base -> begin + let past = Cursor.succ rdangle_base in + let macro = Slice.of_cursors ~base ~past |> Slice.to_string in + Some macro + end + end + +let module_name conf = + Path.Segment.to_string_hlt (Conf.module_ conf) + +let mk_indent indentation = + fun formatter -> formatter |> Fmt.fmt ~width:indentation "" + +let fmt_source_directive indentation source formatter = + let directive_pathstr = + Hmc.Source.Slice.container source + |> Hmc.Source.path + |> Option.value_hlt + |> Path.to_string_hlt + in + let base = Hmc.Source.Slice.base source in + let pos = Hmc.Source.Cursor.pos base in + let line = Text.Pos.line pos in + let col = Text.Pos.col pos in + formatter + |> Fmt.fmt "[:" |> String.pp directive_pathstr + |> Fmt.fmt ":" |> Uns.fmt line + |> Fmt.fmt ":" |> Uns.fmt indentation |> Fmt.fmt "+" |> Uns.fmt (col - indentation) + |> Fmt.fmt "]" + +let expand ~template_indentation template expanders formatter = + formatter + |> (fun formatter -> + let formatter, _first = + String.C.Slice.lines_fold ~init:(formatter, true) ~f:(fun (formatter, first) line -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + let indentation = template_indentation + (line_raw_indentation line) in + match macro_of_line line with + | Some macro -> begin + let expander = Map.get_hlt macro expanders in + formatter |> expander ~indentation + end + | None -> begin + formatter + |> (fun formatter -> + match first, String.C.Slice.length line with + | true, _ + | _, 0L -> formatter + | _, _ -> Fmt.fmt ~width:template_indentation "" formatter + ) + |> Fmt.fmt (String.C.Slice.to_string line) + end + ), + false + ) (String.C.Slice.of_string template) + in + formatter + ) + +let hmi_template = {|{ + Spec = { + Algorithm = { + type t: t = + | Lr1 [@doc "LR(1) algorithm."] + | Ielr1 [@doc "IELR(1) algorithm."] + | Pgm1 [@doc "PGM(1) algorithm."] + | Lalr1 [@doc "LALR(1) algorithm."] + + include IdentifiableIntf.S with type t := t + } + + algorithm: Algorithm.t + [@@doc "Algorithm used to generate parser."] + + Assoc = { + type t: t = + | Left + | Right + + include IdentifiableIntf.S with type t := t + } + + Prec = { + type t: t = { + index: uns # Index in `precs` array. + name: string + assoc: option Assoc.t + doms: Ordset.t uns Uns.cmper_witness # Indices in `precs` array of dominator + # precedences. + } + + include IdentifiableIntf.S with type t := t + } + + precs: array Prec.t + [@@doc "Array of precedences, where each element's `index` field corresponds to the + element's array index."] + + Prod = { + type t: t = { + index: uns # Index in `prods` array. + lhs_index: uns + rhs_indexes: array uns + prec: option Prec.t + callback: uns # Index of reduction callback in `Stack.Reduction.callbacks`. + } + + include IdentifiableIntf.S with type t := t + } + + prods: array Prod.t + [@@doc "Array of productions, where each element's `index` field corresponds to the + element's array index."] + + Symbol = { + type t: t = { + index: uns # Index in `symbols` array. + name: string + prec: option Prec.t + alias: option string + start: bool + prods: Ordset.t Prod.t Prod.cmper_witness # empty ≡ token + first: Ordset.t uns Uns.cmper_witness + follow: Ordset.t uns Uns.cmper_witness + } + + include IdentifiableIntf.S with type t := t + } + + symbols: array Symbol.t + [@@doc "Array of symbols, where each element's `index` field corresponds to the element's + array index."] + + Lr0Item = { + type t: t = { + prod: Prod.t + dot: uns + } + + include IdentifiableIntf.S with type t := t + } + + Lr1Item = { + type t: t = { + lr0item: Lr0Item.t + follow: Ordset.t uns Uns.cmper_witness + } + + include IdentifiableIntf.S with type t := t + } + + Lr1Itemset = { + type t: t = Ordmap.t Lr0Item.t Lr1Item.t Lr0Item.cmper_witness + + include IdentifiableIntf.S with type t := t + } + + Lr1ItemsetClosure = { + type t: t = { + index: uns # Index of corresponding `State.t` in `states` array. + kernel: Lr1Itemset.t + added: Lr1Itemset.t + } + + include IdentifiableIntf.S with type t := t + } + + Action = { + type t: t = + | ShiftPrefix of uns # `states` index. + | ShiftAccept of uns # `states` index. + | Reduce of uns # `prods` index. + + include IdentifiableIntf.S with type t := t + } + + State = { + type t: t = { + lr1ItemsetClosure: Lr1ItemsetClosure.t + actions: Map.t uns Action.t Uns.cmper_witness + gotos: Map.t uns uns Uns.cmper_witness + } + + include IdentifiableIntf.S with type t := t + } + + states: array State.t + [@@doc "Array of CFSM states, where each element's `lr1ItemsetClosure.index` field + corresponds to the element's array index."] + } + + Token = { + «tokens» + + include IdentifiableIntf.S with type t := t + + spec: t -> Spec.Symbol.t + } + + Nonterm = { + «nonterms» + + include IdentifiableIntf.S with type t := t + + spec: t -> Spec.Symbol.t + } + + Symbol = { + type t: t = + | Token of Token.t + | Nonterm of Nonterm.t + + include IdentifiableIntf.S with type t := t + + spec: t -> Spec.Symbol.t + } + + State = { + type t: t = uns + + include IdentifiableIntf.S with type t := t + + spec: t -> Spec.State.t + } + + Stack = { + module Elm : sig + type t: t = { + symbol: Symbol.t; + state: State.t; + } + + include IdentifiableIntf.S with type t := t + end + + type t: t = Elm.t list + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + fmt >e: ?alt:bool -> ?width:uns -> t -> Fmt.Formatter e >e-> Fmt.Formatter e + + Reduction = { + type stack: stack = t + type t: t + type callback: callback = stack -> Symbol.t * stack + + include IdentifiableIntf.S with type t := t + + callbacks: array callback + [@@doc "Array of reduction callback functions containing embedded parser code."] + + callback: t -> callback + } + + shift: symbol:Symbol.t -> state:State.t -> t -> t + [@@doc "Perform a shift."] + + reduce: reduction:Reduction.t -> t -> t + [@@doc "Perform a reduction."] + } + + Status = { + type t: t = + # `feed`/`step` may produce these variants; `next` fast-forwards over them. + | ShiftPrefix of Token.t * State.t + | ShiftAccept of Token.t * State.t + | Reduce of Token.t * Stack.Reduction.t + # Common variants. + | Prefix # Valid parse prefix; more input needed. + | Accept of Nonterm.t # Successful parse result. + | Reject of Token.t # Syntax error due to unexpected token. + + include IdentifiableIntf.S with type t := t + } + + type t: t = { + stack: Stack.t + status: Status.t + } + + Start = { + «starts» + } + + feed: Token.t -> t -> t + [@@doc "`feed token t` returns a result with status in {`ShiftPrefix`, `ShiftAccept`, + `Reduce`, `Reject`}. `t.status` must be `Prefix`."] + + step: t -> t + [@@doc "`step t` returns the result of applying one state transition to `t`. `t.status` must + be in {`ShiftPrefix`, `ShiftAccept`, `Reduce`}."] + + next: Token.t -> t -> t + [@@doc "`next token t` calls `feed token t` and fast-forwards via `step` calls to return a + result with status in {`Prefix`, `Accept`, `Reject`}. `t.status` must be `Prefix`."] + }|} + +let expand_hmi_tokens symbols ~indentation formatter = + let indent = mk_indent indentation in + let fmt_tokens formatter = begin + let formatter, _first = Symbols.tokens_fold ~init:(formatter, true) + ~f:(fun (formatter, first) {name; alias; qtype; _}-> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + match qtype with + | {explicit_opt=None; _} -> begin + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + end + | {explicit_opt=Some {module_; type_}; _} -> begin + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + |> Fmt.fmt " of " + |> Fmt.fmt module_ + |> Fmt.fmt "." + |> Fmt.fmt type_ + end + ) + |> (fun formatter -> + match alias with + | None -> formatter + | Some alias -> formatter |> Fmt.fmt " # " |> String.fmt ~pretty:true alias + ), + false + ) symbols + in + formatter + end in + formatter + |> indent |> Fmt.fmt "type t: t =\n" + |> fmt_tokens + +let expand_hmi_nonterms symbols ~indentation formatter = + let indent = mk_indent indentation in + let fmt_nonterms formatter = begin + let formatter, _first = Symbols.nonterms_fold ~init:(formatter, true) + ~f:(fun (formatter, first) {name; qtype; _} -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + match qtype with + | {explicit_opt=None; _} -> begin + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + end + | {explicit_opt=Some {module_; type_}; _} -> begin + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + |> Fmt.fmt " of " + |> Fmt.fmt module_ + |> Fmt.fmt "." + |> Fmt.fmt type_ + end + ), + false + ) symbols + in + formatter + end in + formatter + |> indent |> Fmt.fmt "type t: t =\n" + |> fmt_nonterms + +let expand_hmi_starts symbols ~indentation formatter = + let indent = mk_indent indentation in + let formatter, _first = Symbols.nonterms_fold ~init:(formatter, true) + ~f:(fun (formatter, first) {name; qtype={synthetic; _}; start; _} -> + (match start && (not synthetic) with + | false -> formatter, first + | true -> begin + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + formatter + |> indent |> String.fmt name |> Fmt.fmt " = {\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "boi: t\n" + ) + |> indent |> Fmt.fmt " }" + ), + false + end + ) + ) symbols + in + formatter + +let expand_hmi_template template_indentation template Spec.{symbols; _} formatter = + let expanders = Map.of_alist (module String) [ + ("«tokens»", expand_hmi_tokens symbols); + ("«nonterms»", expand_hmi_nonterms symbols); + ("«starts»", expand_hmi_starts symbols) + ] in + formatter |> expand ~template_indentation template expanders + +let generate_hmi conf Parse.(Hmhi {prelude; hocc_; postlude; eoi}) io spec = + assert (Spec.conflicts spec = 0L); + let indentation = indentation_of_hocc hocc_ in + let module_name = module_name conf in + let hmhi_name = module_name ^ ".hmhi" in + let hmhi_path = Path.(join [Conf.srcdir conf; of_string hmhi_name] |> to_string_replace) in + let directive_pathstr = String.(hmhi_path |> to_string ~pretty:true) in + let io = + io.hmi + |> Fmt.fmt "# This file was generated by `hocc` based on " + |> Fmt.fmt (String.to_string ~pretty:true hmhi_name) + |> Fmt.fmt "\n" + |> Fmt.fmt "[:" |> Fmt.fmt directive_pathstr |> Fmt.fmt ":1]" + |> (fun formatter -> + match prelude with + | Parse.Matter {token_; _} -> begin + let base = match token_ with + | HmcToken {source; _} -> Hmc.Source.Slice.base source + | HoccToken _ -> not_reached () + in + let past = match hocc_ with + | HmcToken _ -> not_reached () + | HoccToken {source; _} -> Hmc.Source.Slice.base source + in + let source = Hmc.Source.Slice.of_cursors ~base ~past in + formatter |> Fmt.fmt (Hmc.Source.Slice.to_string source) + end + | MatterEpsilon -> formatter + ) + |> Fmt.fmt "[:]" + |> expand_hmi_template indentation hmi_template spec + |> (fun formatter -> + match postlude with + | Parse.Matter _ -> begin + let base = match hocc_ with + | HmcToken _ -> not_reached () + | HoccToken {source; _} -> Hmc.Source.Slice.past source + in + let past = match eoi with + | HmcToken {source; _} -> Hmc.Source.Slice.past source + | HoccToken _ -> not_reached () + in + let source = Hmc.Source.Slice.of_cursors ~base ~past in + formatter + |> fmt_source_directive indentation source + |> Fmt.fmt (Hmc.Source.Slice.to_string source) + end + | MatterEpsilon -> formatter + ) + |> Io.with_hmi io + in + io + +let hm_template = {|{ + Spec = { + Algorithm = { + T = { + type t: t = + | Lr1 + | Ielr1 + | Pgm1 + | Lalr1 + + index = function + | Lr1 -> 0 + | Ielr1 -> 1 + | Pgm1 -> 2 + | Lalr1 -> 3 + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + to_string = function + | Lr1 -> "Lr1" + | Ielr1 -> "Ielr1" + | Pgm1 -> "Pgm1" + | Lalr1 -> "Lalr1" + + pp t formatter = + formatter |> Fmt.fmt (to_string t) + } + include T + include Identifiable.Make(T) + } + + «algorithm» + + Assoc = { + T = { + type t: t = + | Left + | Right + + index = function + | Left -> 0 + | Right -> 1 + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + to_string = function + | Left -> "Left" + | Right -> "Right" + + pp t formatter = + formatter |> Fmt.fmt (to_string t) + } + include T + include Identifiable.Make(T) + } + + Prec = { + T = { + type t: t = { + index: uns + name: string + assoc: option Assoc.t + doms: Ordset.t uns Uns.cmper_witness + } + + index {index; _} = + index + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + pp {index; name; assoc; doms} formatter = + formatter + |> Fmt.fmt + "{%u=(^index + ^); %s=(^name + ^); %f(^Option.pp Assoc.pp^)=(^assoc + ^); %f(^Ordset.pp^)=(^doms + ^)}" + } + include T + include Identifiable.Make(T) + + init ~index ~name ~assoc ~doms = + {index; name; assoc; doms} + } + + «precs» + + Prod = { + T = { + type t: t = { + index: uns + lhs_index: uns + rhs_indexes: array uns + prec: option Prec.t + callback: uns + } + + hash_fold {index; _} state = + Uns.hash_fold index state + + cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + pp {index; lhs_index; rhs_indexes; prec; callback} formatter = + formatter + |> Fmt.fmt + "{%u=(^index + ^); %u=(^lhs_index + ^); %f(^Array.pp Uns.pp^)=(^rhs_indexes + ^); %f(^Option.pp Prec.pp^)=(^prec + ^); %u=(^callback + ^)}" + } + include T + include Identifiable.Make(T) + + init ~index ~lhs_index ~rhs_indexes ~prec ~callback = + {index; lhs_index; rhs_indexes; prec; callback} + } + + «prods» + + Symbol = { + T = { + type t: t = { + index: uns + name: string + prec: option Prec.t + alias: option string + start: bool + prods: Ordset.t Prod.t Prod.cmper_witness + first: Ordset.t uns Uns.cmper_witness + follow: Ordset.t uns Uns.cmper_witness + } + + hash_fold {index; _} state = + Uns.hash_fold index state + + cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + pp {index; name; prec; alias; start; prods; first; follow} formatter = + formatter + |> Fmt.fmt + "{%u=(^index + ^); %s=(^name + ^); %f(^Option.pp Prec.pp^)=(^prec + ^); %f(^Option.pp String.pp^)=(^alias + ^); %b=(^start + ^); %f(^Ordset.pp^)=(^prods + ^); %f(^Ordset.pp^)=(^first + ^); %f(^Ordset.pp^)=(^follow + ^)}" + } + include T + include Identifiable.Make(T) + + init ~index ~name ~prec ~alias ~start ~prods ~first ~follow = + {index; name; prec; alias; start; prods; first; follow} + } + + «symbols» + + Lr0Item = { + T = { + type t: t = { + prod: Prod.t + dot: uns + } + + hash_fold {prod; dot} state = + state + |> Prod.hash_fold prod + |> Uns.hash_fold dot + + cmp {prod=p0; dot=d0} {prod=p1; dot=d1} = + let open Cmp + match Prod.cmp p0 p1 with + | Lt -> Lt + | Eq -> Uns.cmp d0 d1 + | Gt -> Gt + + pp {prod; dot} formatter = + formatter |> Fmt.fmt "{%f(^Prod.pp^)=(^prod^); %u=(^dot^)}" + } + include T + include Identifiable.Make(T) + + init ~prod ~dot = + {prod; dot} + } + + Lr1Item = { + T = { + type t: t = { + lr0item: Lr0Item.t + follow: Ordset.t uns Uns.cmper_witness + } + + hash_fold {lr0item; follow} state = + state + |> Lr0Item.hash_fold lr0item + |> Ordset.hash_fold follow + + cmp {lr0item=l0; follow=f0} {lr0item=l1; follow=f1} = + let open Cmp + match Lr0Item.cmp l0 l1 with + | Lt -> Lt + | Eq -> Ordset.cmp f0 f1 + | Gt -> Gt + + pp {lr0item; follow} formatter = + formatter + |> Fmt.fmt "{%f(^Lr0Item.pp^)=(^lr0item^); %f(^Ordset.pp^)=(^follow^)}" + } + include T + include Identifiable.Make(T) + + init ~lr0item ~follow = + {lr0item; follow} + } + + Lr1Itemset = { + T = { + type t: t = Ordmap.t Lr0Item.t Lr1Item.t Lr0Item.cmper_witness + + hash_fold = Ordmap.hash_fold Lr1Item.hash_fold + cmp = Ordmap.cmp Lr1Item.cmp + pp = Ordmap.pp Lr1Item.pp + } + include T + include Identifiable.Make(T) + + empty = Ordmap.empty Lr0Item + + init = Ordmap.of_alist Lr0Item + } + + Lr1ItemsetClosure = { + T = { + type t: t = { + index: uns + kernel: Lr1Itemset.t + added: Lr1Itemset.t + } + + hash_fold {index; _} state = + state |> Uns.hash_fold index + + cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + pp {index; kernel; added} formatter = + formatter + |> Fmt.fmt + "{%u=(^index + ^); %f(^Lr1Itemset.pp^)=(^kernel + ^); %f(^Lr1Itemset.pp^)=(^added + ^)}" + } + include T + include Identifiable.Make(T) + + init ~index ~kernel ~added = + {index; kernel; added} + } + + Action = { + T = { + type t: t = + | ShiftPrefix of uns + | ShiftAccept of uns + | Reduce of uns + + constructor_index = function + | ShiftPrefix _ -> 0 + | ShiftAccept _ -> 1 + | Reduce _ -> 2 + + arg_index = function + | ShiftPrefix arg_index + | ShiftAccept arg_index + | Reduce arg_index -> arg_index + + hash_fold t state = + state + |> Uns.hash_fold (constructor_index t) + |> Uns.hash_fold (arg_index t) + + cmp t0 t1 = + let open Cmp + match Uns.cmp (constructor_index t0) (constructor_index t1) with + | Lt -> Lt + | Eq -> Uns.cmp (arg_index t0) (arg_index t1) + | Gt -> Gt + + to_string = function + | ShiftPrefix state_index -> "ShiftPrefix %u(^state_index^)" + | ShiftAccept state_index -> "ShiftAccept %u(^state_index^)" + | Reduce prod_index -> "Reduce %u(^prod_index^)" + + pp t formatter = + formatter |> Fmt.fmt (to_string t) + } + include T + include Identifiable.Make(T) + } + + State = { + T = { + type t: t = { + lr1ItemsetClosure: Lr1ItemsetClosure.t + actions: Map.t uns Action.t Uns.cmper_witness + gotos: Map.t uns uns Uns.cmper_witness + } + + hash_fold {lr1ItemsetClosure; _} state = + state |> Lr1ItemsetClosure.hash_fold lr1ItemsetClosure + + cmp {lr1ItemsetClosure=c0; _} {lr1ItemsetClosure=c1; _} = + Lr1ItemsetClosure.cmp c0 c1 + + pp {lr1ItemsetClosure; actions; gotos} formatter = + formatter + |> Fmt.fmt + "{%f(^Lr1ItemsetClosure.pp^)=(^lr1ItemsetClosure + ^); %f(^Map.pp Action.pp^)=(^actions + ^); %f(^Map.pp Uns.pp^)=(^gotos + ^)}" + } + include T + include Identifiable.Make(T) + + init ~lr1ItemsetClosure ~actions ~gotos = + {lr1ItemsetClosure; actions; gotos} + } + + «states» + } + + Token = { + T = { + «tokens» + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + spec t = + Array.get (index t) Spec.symbols + + pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + } + include T + include Identifiable.Make(T) + } + + Nonterm = { + T = { + «nonterms» + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + spec t = + Array.get (index t) Spec.symbols + + pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + } + include T + include Identifiable.Make(T) + } + + Symbol = { + T = { + type t: t = + | Token of Token.t + | Nonterm of Nonterm.t + + index = function + | Token token -> Token.index token + | Nonterm nonterm -> Nonterm.index nonterm + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + spec = function + | Token token -> Token.spec token + | Nonterm nonterm -> Nonterm.spec nonterm + + pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + } + include T + include Identifiable.Make(T) + } + + State = { + T = { + type t: t = uns + + hash_fold t state = + state |> Uns.hash_fold t + + cmp t0 t1 = + Uns.cmp t0 t1 + + spec t = + Array.get t Spec.states + + pp t formatter = + formatter |> Uns.pp t + } + include T + include Identifiable.Make(T) + + init state_index = + state_index + } + + Stack = { + Elm = { + T = { + type t: t = { + symbol: Symbol.t; + state: State.t; + } + + hash_fold {symbol; state} hash_state = + hash_state + |> Symbol.hash_fold symbol + |> State.hash_fold state + + cmp {symbol=symbol0; state=state0} {symbol=symbol1; state=state1} = + let open Cmp + match State.cmp state0 state1 with + | Lt -> Lt + | Eq -> Symbol.cmp symbol0 symbol1 + | Gt -> Gt + + let pp {symbol; state} formatter = + formatter |> Fmt.fmt "{%f(^Symbol.pp^)=(^symbol^); %f(^State.pp^)=(^state^)}" + } + include T + include Identifiable.Make(T) + + init ~symbol ~state = + {symbol; state} + } + + type t: t = list Elm.t + + fmt ?(alt=false) ?(width=0) t formatter = + formatter |> List.fmt ~alt ~width Elm.pp t + + pp t formatter = + formatter |> fmt t + + Reduction = { + T = { + type stack: stack = t + type t: t = uns + type callback: callback = stack -> Symbol.t * stack + + hash_fold t state = + state |> Uns.hash_fold t + + cmp t0 t1 = + Uns.cmp t0 t1 + + pp t formatter = + formatter |> Uns.pp t + } + include T + include Identifiable.Make(T) + + «callbacks» + + callback t = + Array.get t callbacks + + init callback_index = + callback_index + } + + shift ~symbol ~state t = + (Elm.init ~symbol ~state) :: t + + # goto: Symbol.t -> t -> t + goto symbol t = + match t with + | [] -> not_reached () + | Elm.{state; _} :: _ -> + let symbol_index = Symbol.index symbol + let Spec.State.{gotos; _} = Array.get state Spec.states + let state' = Map.get_hlt symbol_index gotos |> State.init + shift ~symbol ~state:state' t + + reduce ~reduction t = + let callback = Reduction.callback reduction + let symbol, t' = callback t + goto symbol t' + } + + Status = { + T = { + type t: t = + | ShiftPrefix of Token.t * State.t + | ShiftAccept of Token.t * State.t + | Reduce of Token.t * Stack.Reduction.t + | Prefix + | Accept of Nonterm.t + | Reject of Token.t + + let constructor_index = function + | ShiftPrefix _ -> 0 + | ShiftAccept _ -> 1 + | Reduce _ -> 2 + | Prefix -> 3 + | Accept _ -> 4 + | Reject _ -> 5 + + let hash_fold t state = + state + |> Uns.hash_fold (constructor_index t) + |> fn hash_state -> + match t with + | ShiftPrefix (token, state) + | ShiftAccept (token, state) -> + hash_state |> State.hash_fold state |> Token.hash_fold token + | Reduce (token, reduction) -> + hash_state |> Stack.Reduction.hash_fold reduction |> Token.hash_fold token + | Prefix -> hash_state + | Accept nonterm -> hash_state |> Nonterm.hash_fold nonterm + | Reject token -> hash_state |> Token.hash_fold token + + let cmp t0 t1 = + let open Cmp + match Uns.cmp (constructor_index t0) (constructor_index t1) with + | Lt -> Lt + | Eq -> + match t0, t1 with + | ShiftPrefix (token0, state0), ShiftPrefix (token1, state1) + | ShiftAccept (token0, state0), ShiftAccept (token1, state1) -> + match State.cmp state0 state1 with + | Lt -> Lt + | Eq -> Token.cmp token0 token1 + | Gt -> Gt + | Reduce (token0, reduction0), Reduce (token1, reduction1) -> + match Stack.Reduction.cmp reduction0 reduction1 with + | Lt -> Lt + | Eq -> Token.cmp token0 token1 + | Gt -> Gt + | Prefix, Prefix -> Eq + | Accept nonterm0, Accept nonterm1 -> Nonterm.cmp nonterm0 nonterm1 + | Reject token0, Reject token1 -> Token.cmp token0 token1 + | _, _ -> not_reached () + | Gt -> Gt + + pp t formatter = + formatter + |> fn formatter -> + match t with + | ShiftPrefix (token, state) -> + formatter + |> Fmt.fmt "ShiftPrefix (%f(^Token.pp^)(^token^), %f(^State.pp^)(^state + ^))" + | ShiftAccept (token, state) -> + formatter + |> Fmt.fmt "ShiftAccept (%f(^Token.pp^)(^token^), %f(^State.pp^)(^state + ^))" + | Reduce reduction -> + formatter + |> Fmt.fmt "Reduce (%f(^Token.pp^)(^token^), %f(^Stack.Reduction.pp + ^)(^reduction^))" + | Prefix -> formatter |> Fmt.fmt "Prefix" + | Accept nonterm -> formatter |> Fmt.fmt "Accept %f(^Nonterm.pp^)(^nonterm^)" + | Reject token -> formatter |> Fmt.fmt "Reject %f(^Token.pp^)(^token^)" + } + include T + include Identifiable.Make(T) + } + + type t: t = { + stack: Stack.t + status: Status.t + } + + Start = { + «starts» + } + + feed token = function + | {stack={state; _} :: _; status=Prefix} as t -> + let token_index = Token.index token + let Spec.State.{actions; _} = Array.get state Spec.states + let status = match Map.get token_index actions with + | Some (Spec.Action.ShiftPrefix state') -> Status.ShiftPrefix (token, state') + | Some (Spec.Action.ShiftAccept state') -> Status.ShiftAccept (token, state') + | Some (Spec.Action.Reduce prod_index) -> + let Spec.Prod.{callback=callback_index; _} = Array.get prod_index Spec.prods + let reduction = Stack.Reduction.init callback_index + Status.Reduce (token, reduction) + | None -> Status.Reject token + {t with status} + | _ -> not_reached () + + step {stack; status} = + let open Status + match status with + | ShiftPrefix (token, state) -> {stack=shift token state stack; status=Prefix} + | ShiftAccept (token, state) -> + # Shift, perform the ⊥ reduction, and extract the accepted symbol from the stack. + let stack = shift token state stack + let pseudo_end_index = Token.index Token.PSEUDO_END + let Spec.State.{actions; _} = Array.get state Spec.states + match Map.get_hlt pseudo_end_index actions with + | Spec.Action.Reduce prod_index -> + let Spec.Prod.{callback=callback_index; _} = Array.get prod_index Spec.prods + let reduction = Stack.Reduction.init callback_index + let stack = Stack.reduce ~reduction stack + match stack with + | [] -> not_reached () + | {symbol=Token _; _} :: _ -> not_reached () + | {symbol=Nonterm nonterm; _} :: _ -> {stack=[]; status=Accept nonterm} + | _ -> not_reached () + | Reduce (token, reduction) -> + feed token {stack=Stack.reduce ~reduction stack; status=Prefix} + | _ -> not_reached () + + # walk: t -> t + rec walk ({status; _} as t) = + let open Status + match status with + | ShiftPrefix _ + | ShiftAccept _ + | Reduce _ -> t |> step |> walk + | Prefix + | Accept _ + | Reject _ -> t + + next token ({status; _} as t) = + match status with + | Status.Prefix -> t |> feed token |> walk + | _ -> not_reached () + }|} + +let state_of_synthetic_start_symbol symbols states synthetic_start_symbol = + assert (Symbol.is_synthetic synthetic_start_symbol); + assert (synthetic_start_symbol.start); + Array.find ~f:(fun state -> + match State.is_start state with + | false -> false + | true -> begin + let start_symbol_index = State.start_symbol_index state in + let start_symbol = Symbols.symbol_of_symbol_index start_symbol_index symbols in + Symbol.(start_symbol = synthetic_start_symbol) + end + ) states + |> Option.value_hlt + +let expand_hm_algorithm algorithm ~indentation formatter = + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "algorithm = Algorithm." |> Conf.pp_algorithm algorithm + +let expand_hm_precs precs ~indentation formatter = + let fmt_precs ~indentation formatter = begin + let indent = mk_indent indentation in + let formatter, _first = Precs.fold ~init:(formatter, true) + ~f:(fun (formatter, first) Prec.{index; name; assoc; doms; _} -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + formatter + |> indent + |> Fmt.fmt "Prec.init" + |> Fmt.fmt " ~index:" |> Prod.Index.pp index + |> Fmt.fmt " ~name:" |> String.pp name + |> Fmt.fmt " ~assoc:" + |> (fun formatter -> + match assoc with + | None -> formatter |> Fmt.fmt "None" + | Some assoc -> formatter |> Fmt.fmt "(Some " |> Assoc.pp assoc |> Fmt.fmt ")" + ) + |> Fmt.fmt " ~doms:(Ordset." + |> (fun formatter -> + match Ordset.length doms with + | 0L -> formatter |> Fmt.fmt "empty Uns" + | 1L -> + formatter |> Fmt.fmt "singleton Uns " |> (Ordset.choose_hlt doms |> Prec.Index.pp) + | _ -> begin + formatter + |> Fmt.fmt "of_list Uns " + |> (Ordset.to_list doms |> List.pp Prec.Index.pp) + end + ) + |> Fmt.fmt ")" + ), + false + ) precs + in + formatter + end in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "precs = [|\n" + |> fmt_precs ~indentation:(indentation+4L) |> Fmt.fmt "\n" + |> indent |> Fmt.fmt " |]" + +let expand_hm_prods prods ~indentation formatter = + let fmt_prods ~indentation formatter = begin + let indent = mk_indent indentation in + let formatter, _first = Prods.fold ~init:(formatter, true) + ~f:(fun (formatter, first) Prod.{index; lhs_index; rhs_indexes; prec; callback; _} -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + formatter + |> indent + |> Fmt.fmt "Prod.init" + |> Fmt.fmt " ~index:" |> Prod.Index.pp index + |> Fmt.fmt " ~lhs_index:" |> Symbol.Index.pp lhs_index + |> Fmt.fmt " ~rhs_indexes:" |> Array.pp Symbol.Index.pp rhs_indexes + |> Fmt.fmt "\n" |> indent |> Fmt.fmt " ~prec:" + |> (fun formatter -> + match prec with + | None -> formatter |> Fmt.fmt "None" + | Some prec -> begin + formatter + |> Fmt.fmt "(Some (Array.get " |> Prec.Index.pp prec.index |> Fmt.fmt " precs))" + end + ) + |> Fmt.fmt " ~callback:" |> Callback.Index.pp callback.index + ), + false + ) prods + in + formatter + end in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "prods = [|\n" + |> fmt_prods ~indentation:(indentation+4L) |> Fmt.fmt "\n" + |> indent |> Fmt.fmt " |]" + +let expand_hm_symbols symbols ~indentation formatter = + let fmt_symbols ~indentation formatter = begin + let indent = mk_indent indentation in + let formatter, _first_line = Symbols.symbols_fold ~init:(formatter, true) + ~f:(fun (formatter, first_line) + Symbol.{index; name; prec; alias; start; prods; first; follow; _} -> + formatter + |> (fun formatter -> + match first_line with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + formatter + |> indent + |> Fmt.fmt "Symbol.init" + |> Fmt.fmt " ~index:" |> Symbol.Index.pp index + |> Fmt.fmt " ~name:" |> String.pp name + |> Fmt.fmt "\n" |> indent |> Fmt.fmt " ~prec:" + |> (fun formatter -> + match prec with + | None -> formatter |> Fmt.fmt "None" + | Some prec -> begin + formatter + |> Fmt.fmt "(Some (Array.get " |> Prec.Index.pp prec.index |> Fmt.fmt " precs))" + end + ) + |> Fmt.fmt " ~alias:" + |> (fun formatter -> + match alias with + | None -> formatter |> Fmt.fmt "None" + | Some alias -> formatter |> Fmt.fmt "(Some " |> String.pp alias |> Fmt.fmt ")" + ) + |> Fmt.fmt " ~start:" |> Bool.pp start + |> Fmt.fmt "\n" |> indent |> Fmt.fmt " ~prods:(" + |> (fun formatter -> + match Ordset.length prods with + | 0L -> formatter |> Fmt.fmt "Ordset.empty Prod" + | 1L -> begin + let Prod.{index; _} = Ordset.choose_hlt prods in + formatter + |> Fmt.fmt "Ordset.singleton Prod (Array.get " + |> Prod.Index.pp index + |> Fmt.fmt " prods)" + end + | _ -> begin + formatter + |> Fmt.fmt "Ordset.of_list Prod " + |> List.fmt ~alt:true ~width:indentation (fun Prod.{index; _} formatter -> + formatter + |> Fmt.fmt "Array.get " |> Prod.Index.pp index |> Fmt.fmt " prods" + ) (Ordset.to_list prods) + end + ) + |> Fmt.fmt ")" + |> Fmt.fmt " ~first:(" + |> (fun formatter -> + match Ordset.length first with + | 0L -> formatter |> Fmt.fmt "Ordset.empty Uns" + | 1L -> begin + let symbol_index = Ordset.choose_hlt first in + formatter |> Fmt.fmt "Ordset.singleton Uns " |> Prod.Index.pp symbol_index + end + | _ -> begin + formatter + |> Fmt.fmt "Ordset.of_list Uns " + |> List.fmt Symbol.Index.pp (Ordset.to_list first) + end + ) + |> Fmt.fmt ")" + |> Fmt.fmt "\n" |> indent |> Fmt.fmt " ~follow:(" + |> (fun formatter -> + match Ordset.length follow with + | 0L -> formatter |> Fmt.fmt "Ordset.empty Uns" + | 1L -> begin + let symbol_index = Ordset.choose_hlt follow in + formatter |> Fmt.fmt "Ordset.singleton Uns " |> Prod.Index.pp symbol_index + end + | _ -> begin + formatter + |> Fmt.fmt "Ordset.of_list Uns " + |> List.pp Symbol.Index.pp (Ordset.to_list follow) + end + ) + |> Fmt.fmt ")" + ), + false + ) symbols + in + formatter + end in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "symbols = [|\n" + |> fmt_symbols ~indentation:(indentation+4L) |> Fmt.fmt "\n" + |> indent |> Fmt.fmt " |]" + +let expand_hm_lr1Itemset lr1itemset ~indentation formatter = + let indent = mk_indent indentation in + match Lr1Itemset.is_empty lr1itemset with + | false -> begin + formatter + |> indent |> Fmt.fmt "Lr1Itemset.init [\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + Lr1Itemset.fold ~init:formatter + ~f:(fun formatter {lr0item={prod={index=prod_index; _}; dot}; follow} -> + formatter + |> indent |> Fmt.fmt "(\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "let lr0item = Lr0Item.init ~prod:" + |> Fmt.fmt "(Array.get " |> Prod.Index.pp prod_index |> Fmt.fmt " prods)" + |> Fmt.fmt " ~dot:" |> Uns.pp dot |> Fmt.fmt "\n" + |> indent |> Fmt.fmt "let lr1item = Lr1Item.init ~lr0item ~follow:\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "Ordset.of_list Uns " |> Ordset.pp follow |> Fmt.fmt "\n" + ) + |> indent |> Fmt.fmt "lr0item, lr1item\n" + ) + |> indent |> Fmt.fmt " )\n" + ) lr1itemset + ) + |> indent |> Fmt.fmt " ]\n" + end + | true -> formatter |> indent |> Fmt.fmt "Lr1Itemset.empty\n" + +let expand_hm_states states ~indentation formatter = + let fmt_states ~indentation formatter = begin + let indent = mk_indent indentation in + let formatter, _first = Array.fold ~init:(formatter, true) + ~f:(fun (formatter, first) + State.{statenub={lr1itemsetclosure={index; kernel; added}; _}; actions; gotos} -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + formatter + |> indent |> Fmt.fmt "(* " |> Lr1ItemsetClosure.Index.pp index + |> Fmt.fmt " *) State.init\n" + |> indent |> Fmt.fmt " ~lr1ItemsetClosure:\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "Lr1ItemsetClosure.init\n" + |> indent |> Fmt.fmt " ~index:" + |> Lr1ItemsetClosure.Index.pp index |> Fmt.fmt "\n" + |> indent |> Fmt.fmt " ~kernel:\n" + |> expand_hm_lr1Itemset kernel ~indentation:(indentation+4L) + |> indent |> Fmt.fmt " ~added:\n" + |> expand_hm_lr1Itemset added ~indentation:(indentation+4L) + ) + |> indent |> Fmt.fmt " ~actions:\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "Map.of_alist Uns [\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + Ordmap.fold ~init:formatter ~f:(fun formatter (symbol_index, action_set) -> + assert (Ordset.length action_set = 1L); + let action = Ordset.choose_hlt action_set in + formatter + |> indent + |> Symbol.Index.pp symbol_index + |> Fmt.fmt ", Action." + |> State.Action.pp action + |> Fmt.fmt "\n" + ) actions + ) + |> indent |> Fmt.fmt " ]\n" + ) + |> indent |> Fmt.fmt " ~gotos:\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + match Ordmap.is_empty gotos with + | false -> begin + formatter + |> indent |> Fmt.fmt "Map.of_alist Uns [\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + Ordmap.fold ~init:formatter ~f:(fun formatter (symbol_index, state_index) -> + formatter + |> indent + |> Symbol.Index.pp symbol_index + |> Fmt.fmt ", " + |> State.Index.pp state_index + |> Fmt.fmt "\n" + ) gotos + ) + |> indent |> Fmt.fmt " ]" + end + | true -> formatter |> indent |> Fmt.fmt "Map.empty Uns" + ) + ), + false + ) states + in + formatter + end in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "states = [|\n" + |> fmt_states ~indentation:(indentation+4L) |> Fmt.fmt "\n" + |> indent |> Fmt.fmt " |]" + +let expand_hm_token_type symbols ~indentation formatter = + let indent = mk_indent indentation in + let fmt_tokens formatter = begin + let formatter, _first = Symbols.tokens_fold ~init:(formatter, true) + ~f:(fun (formatter, first) {name; alias; qtype; _} -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + match qtype with + | {explicit_opt=None; _} -> begin + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + end + | {explicit_opt=Some {module_; type_}; _} -> begin + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + |> Fmt.fmt " of " + |> Fmt.fmt module_ + |> Fmt.fmt "." + |> Fmt.fmt type_ + end + ) + |> (fun formatter -> + match alias with + | None -> formatter + | Some alias -> formatter |> Fmt.fmt " # " |> String.fmt ~pretty:true alias + ), + false + ) symbols + in + formatter + end in + formatter + |> indent |> Fmt.fmt "type t: t =\n" + |> fmt_tokens + +let expand_hm_token_index symbols ~indentation formatter = + let indent = mk_indent indentation in + let fmt_token_indexes formatter = begin + let formatter, _first = Symbols.tokens_fold ~init:(formatter, true) + ~f:(fun (formatter, first) {index; name; qtype; _} -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + |> (fun formatter -> + match qtype.explicit_opt with + | None -> formatter + | Some _ -> formatter |> Fmt.fmt " _" + ) + |> Fmt.fmt " -> " + |> Uns.pp index + ), + false + ) symbols + in + formatter + end in + formatter + |> indent |> Fmt.fmt "index = function\n" + |> fmt_token_indexes + +let expand_hm_tokens symbols ~indentation formatter = + formatter + |> expand_hm_token_type symbols ~indentation |> Fmt.fmt "\n" + |> Fmt.fmt "\n" + |> expand_hm_token_index symbols ~indentation + +let expand_hm_nonterm_type symbols ~indentation formatter = + let indent = mk_indent indentation in + let fmt_nonterms formatter = begin + let formatter, _first = Symbols.nonterms_fold ~init:(formatter, true) + ~f:(fun (formatter, first) {name; qtype; _} -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + match qtype with + | {explicit_opt=None; _} -> begin + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + end + | {explicit_opt=Some {module_; type_}; _} -> begin + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + |> Fmt.fmt " of " + |> Fmt.fmt module_ + |> Fmt.fmt "." + |> Fmt.fmt type_ + end + ), + false + ) symbols + in + formatter + end in + formatter + |> indent |> Fmt.fmt "type t: t =\n" + |> fmt_nonterms + +let expand_hm_nonterm_index symbols ~indentation formatter = + let indent = mk_indent indentation in + let fmt_nonterm_indexes formatter = begin + let formatter, _first = Symbols.nonterms_fold ~init:(formatter, true) + ~f:(fun (formatter, first) {index; name; _} -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + |> Fmt.fmt " _ -> " + |> Uns.pp index + ), + false + ) symbols + in + formatter + end in + formatter + |> indent |> Fmt.fmt "index = function\n" + |> fmt_nonterm_indexes + +let expand_hm_nonterms symbols ~indentation formatter = + formatter + |> expand_hm_nonterm_type symbols ~indentation |> Fmt.fmt "\n" + |> Fmt.fmt "\n" + |> expand_hm_nonterm_index symbols ~indentation + +let expand_hm_callbacks hocc_block symbols callbacks ~indentation formatter = + let fmt_callbacks ~indentation formatter = begin + let indent = mk_indent indentation in + let formatter, _first = Callbacks.fold ~init:(formatter, true) + ~f:(fun (formatter, first) (Callback.{index; lhs_name; rhs; code; _} as callback) -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + formatter + |> indent |> Fmt.fmt "(* " |> Callback.Index.pp index + |> Fmt.fmt " *) " + |> (fun formatter -> + match Option.is_empty code with + | false -> begin + let underline = Codepoint.of_char '_' in + let overline = Codepoint.kv 0x203eL (*'‾'*) in + let code = Option.value_hlt code in + let source = Parse.source_of_code hocc_block code in + formatter + |> Fmt.fmt "function\n" + |> (fun formatter -> + let formatter, _first = + Callback.Params.fold_right ~init:(formatter, true) + ~f:(fun (formatter, first) + Callback.Param.{binding; symbol_name; _} -> + let is_token = + Symbols.symbol_of_name symbol_name symbols + |> Option.value_hlt + |> Symbol.is_token + in + let symbol_constructor = match is_token with + | true -> "Token" + | false -> "Nonterm" + in + formatter + |> indent + |> Fmt.fmt (match first with + | true -> " | " + | false -> " :: " + ) + |> (fun formatter -> + match binding with + | Some uname -> begin + formatter + |> Fmt.fmt "{symbol=Symbol." + |> Fmt.fmt symbol_constructor + |> Fmt.fmt " (" + |> Fmt.fmt symbol_name + |> Fmt.fmt " " + |> Fmt.fmt uname + |> Fmt.fmt "); _}" + end + | None -> formatter |> Fmt.fmt "_" + ) + |> Fmt.fmt "\n" + , false + ) rhs + in + formatter + ) + |> indent |> Fmt.fmt " " + |> Fmt.fmt (match Callback.is_epsilon callback with false -> ":: " | true -> "") + |> Fmt.fmt "tl -> Symbol.Nonterm (" + |> Fmt.fmt lhs_name |> Fmt.fmt " (\n" + |> indent + |> String.fmt ~pad:underline ~just:Fmt.Left ~width:(100L - indentation) " # " + |> Fmt.fmt "\n" + |> indent |> Fmt.fmt " " + |> fmt_source_directive (Parse.indentation_of_code hocc_block code) source + |> Fmt.fmt (Hmc.Source.Slice.to_string source) + |> Fmt.fmt "[:]\n" + |> indent + |> String.fmt ~pad:overline ~just:Fmt.Left ~width:(100L - indentation) " # " + |> Fmt.fmt "\n" + |> indent |> Fmt.fmt " )), tl" + |> (fun formatter -> + match Callback.is_epsilon callback with + | false -> + formatter |> Fmt.fmt "\n" |> indent |> Fmt.fmt " | _ -> not_reached ()" + | true -> formatter + ) + end + | true -> formatter |> Fmt.fmt "fn _stack -> not_reached ()" + ) + ), + false + ) callbacks + in + formatter + end in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "callbacks = [|\n" + |> fmt_callbacks ~indentation:(indentation+4L) |> Fmt.fmt "\n" + |> indent |> Fmt.fmt " |]" + +let expand_hm_starts symbols states ~indentation formatter = + let indent = mk_indent indentation in + let formatter, _first = Symbols.nonterms_fold ~init:(formatter, true) + ~f:(fun (formatter, first) {name; qtype={synthetic; _}; start; _} -> + match (start && (not synthetic)) with + | false -> formatter, first + | true -> begin + let synthetic_name = Spec.synthetic_name_of_start_name name in + let synthetic_start_symbol = + Symbols.symbol_of_name synthetic_name symbols |> Option.value_hlt in + let state = + state_of_synthetic_start_symbol symbols states synthetic_start_symbol in + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + formatter + |> indent |> String.fmt name |> Fmt.fmt " = {\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "boi = {\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "stack=[{\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "symbol=Token Token.EPSILON\n" + |> indent |> Fmt.fmt "state=State.init " |> State.(index state |> Index.pp) + |> Fmt.fmt "\n" + ) + |> indent |> Fmt.fmt " }]\n" + |> indent |> Fmt.fmt "status=Prefix\n" + ) + |> indent |> Fmt.fmt " }\n" + ) + |> indent |> Fmt.fmt " }" + ), + false + end + ) symbols + in + formatter + +let expand_hm_template template_indentation template hocc_block + Spec.{algorithm; precs; symbols; prods; callbacks; states} formatter = + let expanders = Map.of_alist (module String) [ + ("«algorithm»", expand_hm_algorithm algorithm); + ("«precs»", expand_hm_precs precs); + ("«prods»", expand_hm_prods prods); + ("«symbols»", expand_hm_symbols symbols); + ("«states»", expand_hm_states states); + ("«tokens»", expand_hm_tokens symbols); + ("«nonterms»", expand_hm_nonterms symbols); + ("«callbacks»", expand_hm_callbacks hocc_block symbols callbacks); + ("«starts»", expand_hm_starts symbols states) + ] in + formatter |> expand ~template_indentation template expanders + +let generate_hm conf + Parse.(Hmh {prelude; hocc_=(Hocc {hocc_; _} as hocc_block); postlude; eoi}) io spec = + assert (Spec.conflicts spec = 0L); + let indentation = indentation_of_hocc hocc_ in + let module_name = module_name conf in + let hmh_name = module_name ^ ".hmh" in + let hmh_path = Path.(join [Conf.srcdir conf; of_string hmh_name] |> to_string_replace) in + let directive_pathstr = String.(hmh_path |> to_string ~pretty:true) in + let io = + io.hm + |> Fmt.fmt "# This file was generated by `hocc` based on " + |> Fmt.fmt (String.to_string ~pretty:true hmh_name) + |> Fmt.fmt "\n" + |> Fmt.fmt "[:" |> Fmt.fmt directive_pathstr |> Fmt.fmt ":1]" + |> (fun formatter -> + match prelude with + | Parse.Matter {token_; _} -> begin + let base = match token_ with + | HmcToken {source; _} -> Hmc.Source.Slice.base source + | HoccToken _ -> not_reached () + in + let past = match hocc_ with + | HmcToken _ -> not_reached () + | HoccToken {source; _} -> Hmc.Source.Slice.base source + in + let source = Hmc.Source.Slice.of_cursors ~base ~past in + formatter |> Fmt.fmt (Hmc.Source.Slice.to_string source) + end + | MatterEpsilon -> formatter + ) + |> Fmt.fmt "[:]" + |> expand_hm_template indentation hm_template hocc_block spec + |> (fun formatter -> + match postlude with + | Parse.Matter _ -> begin + let base = Parse.postlude_base_of_hocc hocc_block in + let past = match eoi with + | HmcToken {source; _} -> Hmc.Source.Slice.past source + | HoccToken _ -> not_reached () + in + let source = Hmc.Source.Slice.of_cursors ~base ~past in + formatter + |> fmt_source_directive indentation source + |> Fmt.fmt (Hmc.Source.Slice.to_string source) + end + | MatterEpsilon -> formatter + ) + |> Io.with_hm io + in + io + +(*************************************************************************************************** + * OCaml code generation. *) + +(* Source directives thwart debugging generated code. This is only an issue for `hocc` development, + * hence this unexposed knob. *) +let ml_source_comments = false + +let fmt_ml_source_directive source formatter = + (* The line directive (both its value and whether to output a terminating '\n') needs to be + * adjusted according to whether the `hocc` keyword is immediately followed by a newline. *) + let nl_termination = match (Hmc.Source.Slice.base source |> Hmc.Source.Cursor.next_opt) with + | None -> true + | Some (cp, _) -> Codepoint.(cp <> of_char '\n') + in + let directive_pathstr = + Hmc.Source.Slice.container source + |> Hmc.Source.path + |> Option.value_hlt + |> Path.to_string_hlt + in + let base = Hmc.Source.Slice.base source in + let pos = Hmc.Source.Cursor.pos base in + let line = Text.Pos.line pos in + formatter + |> Fmt.fmt "\n" + |> Fmt.fmt (match ml_source_comments with false -> "" | true -> "(* ") + |> Fmt.fmt "#" |> Uns.fmt (line + (Bool.to_uns (not nl_termination))) + |> Fmt.fmt " " |> String.pp directive_pathstr + |> Fmt.fmt (match ml_source_comments with false -> "" | true -> " *)") + |> (fun formatter -> match nl_termination with + | false -> formatter + | true -> formatter |> Fmt.fmt "\n" + ) + +let ml_uns_pp u formatter = + formatter + |> Uns.fmt ~alt:false u + |> Fmt.fmt "L" + +let mli_template = {|sig + module Spec : sig + module Algorithm : sig + type t = + | Lr1 (** LR(1) algorithm. *) + | Ielr1 (** IELR(1) algorithm. *) + | Pgm1 (** PGM(1) algorithm. *) + | Lalr1 (** LALR(1) algorithm. *) + + include IdentifiableIntf.S with type t := t + end + + val algorithm: Algorithm.t + (** Algorithm used to generate parser. *) + + module Assoc : sig + type t = + | Left + | Right + + include IdentifiableIntf.S with type t := t + end + + module Prec : sig + type t = { + index: uns; (* Index in `precs` array. *) + name: string; + assoc: Assoc.t option; + doms: (uns, Uns.cmper_witness) Ordset.t; (* Indices in `precs` array of dominator + * precedences. *) + } + + include IdentifiableIntf.S with type t := t + end + + val precs: Prec.t array + (** Array of precedences, where each element's `index` field corresponds to the element's + array index. *) + + module Prod : sig + type t = { + index: uns; (* Index in `prods` array. *) + lhs_index: uns; + rhs_indexes: uns array; + prec: Prec.t option; + callback: uns; (* Index of reduction callback in `Stack.Reduction.callbacks`. *) + } + + include IdentifiableIntf.S with type t := t + end + + val prods: Prod.t array + (** Array of productions, where each element's `index` field corresponds to the element's + array index. *) + + module Symbol : sig + type t = { + index: uns; (* Index in `symbols` array. *) + name: string; + prec: Prec.t option; + alias: string option; + start: bool; + prods: (Prod.t, Prod.cmper_witness) Ordset.t; (* empty ≡ token *) + first: (uns, Uns.cmper_witness) Ordset.t; + follow: (uns, Uns.cmper_witness) Ordset.t; + } + + include IdentifiableIntf.S with type t := t + end + + val symbols: Symbol.t array + (** Array of symbols, where each element's `index` field corresponds to the element's + array index. *) + + module Lr0Item : sig + type t = { + prod: Prod.t; + dot: uns; + } + + include IdentifiableIntf.S with type t := t + end + + module Lr1Item : sig + type t = { + lr0item: Lr0Item.t; + follow: (uns, Uns.cmper_witness) Ordset.t; + } + + include IdentifiableIntf.S with type t := t + end + + module Lr1Itemset : sig + type t = (Lr0Item.t, Lr1Item.t, Lr0Item.cmper_witness) Ordmap.t + + include IdentifiableIntf.S with type t := t + end + + module Lr1ItemsetClosure : sig + type t = { + index: uns; (* Index of corresponding `State.t` in `states` array. *) + kernel: Lr1Itemset.t; + added: Lr1Itemset.t; + } + + include IdentifiableIntf.S with type t := t + end + + module Action : sig + type t = + | ShiftPrefix of uns (* `states` index. *) + | ShiftAccept of uns (* `states` index. *) + | Reduce of uns (* `prods` index. *) + + include IdentifiableIntf.S with type t := t + end + + module State : sig + type t = { + lr1ItemsetClosure: Lr1ItemsetClosure.t; + actions: (uns, Action.t, Uns.cmper_witness) Map.t; + gotos: (uns, uns, Uns.cmper_witness) Map.t; + } + + include IdentifiableIntf.S with type t := t + end + + val states: State.t array + (** Array of CFSM states, where each element's `lr1ItemsetClosure.index` field corresponds + to the element's array index. *) + end + + module Token : sig + «tokens» + + include IdentifiableIntf.S with type t := t + + val spec: t -> Spec.Symbol.t + end + + module Nonterm : sig + «nonterms» + + include IdentifiableIntf.S with type t := t + + val spec: t -> Spec.Symbol.t + end + + module Symbol : sig + type t = + | Token of Token.t + | Nonterm of Nonterm.t + + include IdentifiableIntf.S with type t := t + + val spec: t -> Spec.Symbol.t + end + + module State : sig + type t = uns + + include IdentifiableIntf.S with type t := t + + val spec: t -> Spec.State.t + end + + module Stack : sig + module Elm : sig + type t = { + symbol: Symbol.t; + state: State.t; + } + + include IdentifiableIntf.S with type t := t + end + + type t = Elm.t list + + val pp: t -> (module Fmt.Formatter) -> (module Fmt.Formatter) + val fmt: ?alt:bool -> ?width:uns -> t -> (module Fmt.Formatter) -> (module Fmt.Formatter) + + module Reduction : sig + type stack = t + type t + type callback = stack -> Symbol.t * stack + + include IdentifiableIntf.S with type t := t + + val callbacks: callback array + (** Array of reduction callback functions containing embedded parser code. *) + + val callback: t -> callback + end + + val shift: symbol:Symbol.t -> state:State.t -> t -> t + (** Perform a shift. *) + + val reduce: reduction:Reduction.t -> t -> t + (** Perform a reduction. *) + end + + module Status : sig + type t = + (* `feed`/`step` may produce these variants; `next` fast-forwards over them. *) + | ShiftPrefix of Token.t * State.t + | ShiftAccept of Token.t * State.t + | Reduce of Token.t * Stack.Reduction.t + (* Common variants. *) + | Prefix (** Valid parse prefix; more input needed. *) + | Accept of Nonterm.t (** Successful parse result. *) + | Reject of Token.t (** Syntax error due to unexpected token. *) + + include IdentifiableIntf.S with type t := t + end + + type t = { + stack: Stack.t; + status: Status.t; + } + + module Start : sig + «starts» + end + + val feed: Token.t -> t -> t + (** `feed token t` returns a result with status in {`ShiftPrefix`, `ShiftAccept`, `Reduce`, + `Reject`}. `t.status` must be `Prefix`. *) + + val step: t -> t + (** `step t` returns the result of applying one state transition to `t`. `t.status` must be in + {`ShiftPrefix`, `ShiftAccept`, `Reduce`}. *) + + val next: Token.t -> t -> t + (** `next token t` calls `feed token t` and fast-forwards via `step` calls to return a result + with status in {`Prefix`, `Accept`, `Reject`}. `t.status` must be `Prefix`. *) + end|} + +let expand_mli_tokens symbols ~indentation formatter = + let indent = mk_indent indentation in + let fmt_tokens formatter = begin + let formatter, _first = Symbols.tokens_fold ~init:(formatter, true) + ~f:(fun (formatter, first) {name; alias; qtype; _}-> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + match qtype with + | {explicit_opt=None; _} -> begin + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + end + | {explicit_opt=Some {module_; type_}; _} -> begin + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + |> Fmt.fmt " of " + |> Fmt.fmt module_ + |> Fmt.fmt "." + |> Fmt.fmt type_ + end + ) + |> (fun formatter -> + match alias with + | None -> formatter + | Some alias -> + formatter |> Fmt.fmt " (* " |> String.fmt ~pretty:true alias |> Fmt.fmt " *)" + ), + false + ) symbols + in + formatter + end in + formatter + |> indent |> Fmt.fmt "type t =\n" + |> fmt_tokens + +let expand_mli_nonterms symbols ~indentation formatter = + let indent = mk_indent indentation in + let fmt_nonterms formatter = begin + let formatter, _first = Symbols.nonterms_fold ~init:(formatter, true) + ~f:(fun (formatter, first) {name; qtype; _} -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + match qtype with + | {explicit_opt=None; _} -> begin + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + end + | {explicit_opt=Some {module_; type_}; _} -> begin + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + |> Fmt.fmt " of " + |> Fmt.fmt module_ + |> Fmt.fmt "." + |> Fmt.fmt type_ + end + ), + false + ) symbols + in + formatter + end in + formatter + |> indent |> Fmt.fmt "type t =\n" + |> fmt_nonterms + +let expand_mli_starts symbols ~indentation formatter = + let indent = mk_indent indentation in + let formatter, _first = Symbols.nonterms_fold ~init:(formatter, true) + ~f:(fun (formatter, first) {name; qtype={synthetic; _}; start; _} -> + (match start && (not synthetic) with + | false -> formatter, first + | true -> begin + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + formatter + |> indent |> Fmt.fmt "module " |> String.fmt name |> Fmt.fmt " : sig\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "val boi: t\n" + ) + |> indent |> Fmt.fmt " end" + ), + false + end + ) + ) symbols + in + formatter + +let expand_mli_template template_indentation template Spec.{symbols; _} formatter = + let expanders = Map.of_alist (module String) [ + ("«tokens»", expand_mli_tokens symbols); + ("«nonterms»", expand_mli_nonterms symbols); + ("«starts»", expand_mli_starts symbols) + ] in + formatter |> expand ~template_indentation template expanders + +let generate_mli conf Parse.(Hmhi {prelude; hocc_; postlude; eoi}) io spec = + assert (Spec.conflicts spec = 0L); + let indentation = indentation_of_hocc hocc_ in + let module_name = module_name conf in + let hmhi_name = module_name ^ ".hmhi" in + let io = + io.mli + |> Fmt.fmt "(* This file was generated by `hocc` based on " + |> Fmt.fmt (String.to_string ~pretty:true hmhi_name) + |> Fmt.fmt " *)\n" + |> (fun formatter -> + match prelude with + | Parse.Matter {token_; _} -> begin + let base = match token_ with + | HmcToken {source; _} -> Hmc.Source.Slice.base source + | HoccToken _ -> not_reached () + in + let past = match hocc_ with + | HmcToken _ -> not_reached () + | HoccToken {source; _} -> Hmc.Source.Slice.base source + in + let source = Hmc.Source.Slice.of_cursors ~base ~past in + formatter |> Fmt.fmt (Hmc.Source.Slice.to_string source) + end + | MatterEpsilon -> formatter + ) + |> expand_mli_template indentation mli_template spec + |> (fun formatter -> + match postlude with + | Parse.Matter _ -> begin + let base = match hocc_ with + | HmcToken _ -> not_reached () + | HoccToken {source; _} -> Hmc.Source.Slice.past source + in + let past = match eoi with + | HmcToken {source; _} -> Hmc.Source.Slice.past source + | HoccToken _ -> not_reached () + in + let source = Hmc.Source.Slice.of_cursors ~base ~past in + formatter + |> fmt_ml_source_directive source + |> Fmt.fmt (Hmc.Source.Slice.to_string source) + end + | MatterEpsilon -> formatter + ) + |> Io.with_mli io + in + io + +let ml_template = {|struct + module Spec = struct + module Algorithm = struct + module T = struct + type t = + | Lr1 + | Ielr1 + | Pgm1 + | Lalr1 + + let index = function + | Lr1 -> 0L + | Ielr1 -> 1L + | Pgm1 -> 2L + | Lalr1 -> 3L + + let hash_fold t state = + state |> Uns.hash_fold (index t) + + let cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + let to_string = function + | Lr1 -> "Lr1" + | Ielr1 -> "Ielr1" + | Pgm1 -> "Pgm1" + | Lalr1 -> "Lalr1" + + let pp t formatter = + formatter |> Fmt.fmt (to_string t) + end + include T + include Identifiable.Make(T) + end + + «algorithm» + + module Assoc = struct + module T = struct + type t = + | Left + | Right + + let index = function + | Left -> 0L + | Right -> 1L + + let hash_fold t state = + state |> Uns.hash_fold (index t) + + let cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + let to_string = function + | Left -> "Left" + | Right -> "Right" + + let pp t formatter = + formatter |> Fmt.fmt (to_string t) + end + include T + include Identifiable.Make(T) + end + + module Prec = struct + module T = struct + type t = { + index: uns; + name: string; + assoc: Assoc.t option; + doms: (uns, Uns.cmper_witness) Ordset.t; + } + + let index {index; _} = + index + + let hash_fold t state = + state |> Uns.hash_fold (index t) + + let cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + let pp {index; name; assoc; doms} formatter = + formatter + |> Fmt.fmt "{index=" |> Uns.pp index + |> Fmt.fmt "; name=" |> String.pp name + |> Fmt.fmt "; assoc=" |> Option.pp Assoc.pp assoc + |> Fmt.fmt "; doms=" |> Ordset.pp doms + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) + + let init ~index ~name ~assoc ~doms = + {index; name; assoc; doms} + end + + «precs» + + module Prod = struct + module T = struct + type t = { + index: uns; + lhs_index: uns; + rhs_indexes: uns array; + prec: Prec.t option; + callback: uns; + } + + let hash_fold {index; _} state = + Uns.hash_fold index state + + let cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + let pp {index; lhs_index; rhs_indexes; prec; callback} formatter = + formatter + |> Fmt.fmt "{index=" |> Uns.pp index + |> Fmt.fmt "; lhs_index=" |> Uns.pp lhs_index + |> Fmt.fmt "; rhs_indexes=" |> Array.pp Uns.pp rhs_indexes + |> Fmt.fmt "; prec=" |> Option.pp Prec.pp prec + |> Fmt.fmt "; callback=" |> Uns.pp callback + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) + + let init ~index ~lhs_index ~rhs_indexes ~prec ~callback = + {index; lhs_index; rhs_indexes; prec; callback} + end + + «prods» + + module Symbol = struct + module T = struct + type t = { + index: uns; + name: string; + prec: Prec.t option; + alias: string option; + start: bool; + prods: (Prod.t, Prod.cmper_witness) Ordset.t; + first: (uns, Uns.cmper_witness) Ordset.t; + follow: (uns, Uns.cmper_witness) Ordset.t; + } + + let hash_fold {index; _} state = + Uns.hash_fold index state + + let cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + let pp {index; name; prec; alias; start; prods; first; follow} formatter = + formatter + |> Fmt.fmt "{index=" |> Uns.pp index + |> Fmt.fmt "; name=" |> String.pp name + |> Fmt.fmt "; prec=" |> Option.pp Prec.pp prec + |> Fmt.fmt "; alias=" |> Option.pp String.pp alias + |> Fmt.fmt "; start=" |> Bool.pp start + |> Fmt.fmt "; prods=" |> Ordset.pp prods + |> Fmt.fmt "; first=" |> Ordset.pp first + |> Fmt.fmt "; follow=" |> Ordset.pp follow + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) + + let init ~index ~name ~prec ~alias ~start ~prods ~first ~follow = + {index; name; prec; alias; start; prods; first; follow} + end + + «symbols» + + module Lr0Item = struct + module T = struct + type t = { + prod: Prod.t; + dot: uns; + } + + let hash_fold {prod; dot} state = + state + |> Prod.hash_fold prod + |> Uns.hash_fold dot + + let cmp {prod=p0; dot=d0} {prod=p1; dot=d1} = + let open Cmp in + match Prod.cmp p0 p1 with + | Lt -> Lt + | Eq -> Uns.cmp d0 d1 + | Gt -> Gt + + let pp {prod; dot} formatter = + formatter + |> Fmt.fmt "{prod=" |> Prod.pp prod + |> Fmt.fmt "; dot=" |> Uns.pp dot + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) + + let init ~prod ~dot = + {prod; dot} + end + + module Lr1Item = struct + module T = struct + type t = { + lr0item: Lr0Item.t; + follow: (uns, Uns.cmper_witness) Ordset.t; + } + + let hash_fold {lr0item; follow} state = + state + |> Lr0Item.hash_fold lr0item + |> Ordset.hash_fold follow + + let cmp {lr0item=l0; follow=f0} {lr0item=l1; follow=f1} = + let open Cmp in + match Lr0Item.cmp l0 l1 with + | Lt -> Lt + | Eq -> Ordset.cmp f0 f1 + | Gt -> Gt + + let pp {lr0item; follow} formatter = + formatter + |> Fmt.fmt "{lr0item=" |> Lr0Item.pp lr0item + |> Fmt.fmt "; follow=" |> Ordset.pp follow + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) + + let init ~lr0item ~follow = + {lr0item; follow} + end + + module Lr1Itemset = struct + module T = struct + type t = (Lr0Item.t, Lr1Item.t, Lr0Item.cmper_witness) Ordmap.t + + let hash_fold t = + Ordmap.hash_fold Lr1Item.hash_fold t + + let cmp t0 t1 = + Ordmap.cmp Lr1Item.cmp t0 t1 + + let pp = Ordmap.pp Lr1Item.pp + end + include T + include Identifiable.Make(T) + + let empty = Ordmap.empty (module Lr0Item) + + let init = Ordmap.of_alist (module Lr0Item) + end + + module Lr1ItemsetClosure = struct + module T = struct + type t = { + index: uns; + kernel: Lr1Itemset.t; + added: Lr1Itemset.t; + } + + let hash_fold {index; _} state = + state |> Uns.hash_fold index + + let cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + let pp {index; kernel; added} formatter = + formatter + |> Fmt.fmt "{index=" |> Uns.pp index + |> Fmt.fmt "; kernel=" |> Lr1Itemset.pp kernel + |> Fmt.fmt "; added=" |> Lr1Itemset.pp added + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) + + let init ~index ~kernel ~added = + {index; kernel; added} + end + + module Action = struct + module T = struct + type t = + | ShiftPrefix of uns + | ShiftAccept of uns + | Reduce of uns + + let constructor_index = function + | ShiftPrefix _ -> 0L + | ShiftAccept _ -> 1L + | Reduce _ -> 2L + + let arg_index = function + | ShiftPrefix arg_index + | ShiftAccept arg_index + | Reduce arg_index -> arg_index + + let hash_fold t state = + state + |> Uns.hash_fold (constructor_index t) + |> Uns.hash_fold (arg_index t) + + let cmp t0 t1 = + let open Cmp in + match Uns.cmp (constructor_index t0) (constructor_index t1) with + | Lt -> Lt + | Eq -> Uns.cmp (arg_index t0) (arg_index t1) + | Gt -> Gt + + let to_string = function + | ShiftPrefix state_index -> begin + String.Fmt.empty + |> Fmt.fmt "ShiftPrefix " |> Uns.pp state_index + |> Fmt.to_string + end + | ShiftAccept state_index -> begin + String.Fmt.empty + |> Fmt.fmt "ShiftAccept " |> Uns.pp state_index + |> Fmt.to_string + end + | Reduce prod_index -> begin + String.Fmt.empty + |> Fmt.fmt "Reduce " |> Uns.pp prod_index + |> Fmt.to_string + end + + let pp t formatter = + formatter |> Fmt.fmt (to_string t) + end + include T + include Identifiable.Make(T) + end + + module State = struct + module T = struct + type t = { + lr1ItemsetClosure: Lr1ItemsetClosure.t; + actions: (uns, Action.t, Uns.cmper_witness) Map.t; + gotos: (uns, uns, Uns.cmper_witness) Map.t; + } + + let hash_fold {lr1ItemsetClosure; _} state = + state |> Lr1ItemsetClosure.hash_fold lr1ItemsetClosure + + let cmp {lr1ItemsetClosure=c0; _} {lr1ItemsetClosure=c1; _} = + Lr1ItemsetClosure.cmp c0 c1 + + let pp {lr1ItemsetClosure; actions; gotos} formatter = + formatter + |> Fmt.fmt "{lr1ItemsetClosure=" |> Lr1ItemsetClosure.pp lr1ItemsetClosure + |> Fmt.fmt "; actions=" |> Map.pp Action.pp actions + |> Fmt.fmt "; gotos=" |> Map.pp Uns.pp gotos + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) + + let init ~lr1ItemsetClosure ~actions ~gotos = + {lr1ItemsetClosure; actions; gotos} + end + + «states» + end + + module Token = struct + module T = struct + «tokens» + + let hash_fold t state = + state |> Uns.hash_fold (index t) + + let cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + let spec t = + Array.get (index t) Spec.symbols + + let pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + end + include T + include Identifiable.Make(T) + end + + module Nonterm = struct + module T = struct + «nonterms» + + let hash_fold t state = + state |> Uns.hash_fold (index t) + + let cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + let spec t = + Array.get (index t) Spec.symbols + + let pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + end + include T + include Identifiable.Make(T) + end + + module Symbol = struct + module T = struct + type t = + | Token of Token.t + | Nonterm of Nonterm.t + + let index = function + | Token token -> Token.index token + | Nonterm nonterm -> Nonterm.index nonterm + + let hash_fold t state = + state |> Uns.hash_fold (index t) + + let cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + let spec = function + | Token token -> Token.spec token + | Nonterm nonterm -> Nonterm.spec nonterm + + let pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + end + include T + include Identifiable.Make(T) + end + + module State = struct + module T = struct + type t = uns + + let hash_fold t state = + state |> Uns.hash_fold t + + let cmp t0 t1 = + Uns.cmp t0 t1 + + let spec t = + Array.get t Spec.states + + let pp t formatter = + formatter |> Uns.pp t + end + include T + include Identifiable.Make(T) + + let init state_index = + state_index + end + + module Stack = struct + module Elm = struct + module T = struct + type t = { + symbol: Symbol.t; + state: State.t; + } + + let hash_fold {symbol; state} hash_state = + hash_state + |> Symbol.hash_fold symbol + |> State.hash_fold state + + let cmp {symbol=symbol0; state=state0} {symbol=symbol1; state=state1} = + let open Cmp in + match State.cmp state0 state1 with + | Lt -> Lt + | Eq -> Symbol.cmp symbol0 symbol1 + | Gt -> Gt + + let pp {symbol; state} formatter = + formatter + |> Fmt.fmt "{symbol=" |> Symbol.pp symbol + |> Fmt.fmt "; state=" |> State.pp state + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) + + let init ~symbol ~state = + {symbol; state} + end + + type t = Elm.t list + + let fmt ?(alt=false) ?(width=0L) t formatter = + formatter |> List.fmt ~alt ~width Elm.pp t + + let pp t formatter = + formatter |> fmt t + + module Reduction = struct + module T = struct + type stack = t + type t = uns + type callback = stack -> Symbol.t * stack + + let hash_fold t state = + state |> Uns.hash_fold t + + let cmp t0 t1 = + Uns.cmp t0 t1 + + let pp t formatter = + formatter |> Uns.pp t + end + include T + include Identifiable.Make(T) + + «callbacks» + + let callback t = + Array.get t callbacks + + let init callback_index = + callback_index + end + + let shift ~symbol ~state t = + (Elm.init ~symbol ~state) :: t + + (* val goto: Symbol.t -> t -> t *) + let goto symbol t = + match t with + | [] -> not_reached () + | Elm.{state; _} :: _ -> + let symbol_index = Symbol.index symbol in + let Spec.State.{gotos; _} = Array.get state Spec.states in + let state' = Map.get_hlt symbol_index gotos |> State.init in + shift ~symbol ~state:state' t + + let reduce ~reduction t = + let callback = Reduction.callback reduction in + let symbol, t' = callback t in + goto symbol t' + end + + module Status = struct + module T = struct + type t = + | ShiftPrefix of Token.t * State.t + | ShiftAccept of Token.t * State.t + | Reduce of Token.t * Stack.Reduction.t + | Prefix + | Accept of Nonterm.t + | Reject of Token.t + + let constructor_index = function + | ShiftPrefix _ -> 0L + | ShiftAccept _ -> 1L + | Reduce _ -> 2L + | Prefix -> 3L + | Accept _ -> 4L + | Reject _ -> 5L + + let hash_fold t state = + state + |> Uns.hash_fold (constructor_index t) + |> (fun hash_state -> + match t with + | ShiftPrefix (token, state) + | ShiftAccept (token, state) -> + hash_state |> State.hash_fold state |> Token.hash_fold token + | Reduce (token, reduction) -> + hash_state |> Stack.Reduction.hash_fold reduction |> Token.hash_fold token + | Prefix -> hash_state + | Accept nonterm -> hash_state |> Nonterm.hash_fold nonterm + | Reject token -> hash_state |> Token.hash_fold token + ) + + let cmp t0 t1 = + let open Cmp in + match Uns.cmp (constructor_index t0) (constructor_index t1) with + | Lt -> Lt + | Eq -> begin + match t0, t1 with + | ShiftPrefix (token0, state0), ShiftPrefix (token1, state1) + | ShiftAccept (token0, state0), ShiftAccept (token1, state1) -> begin + match State.cmp state0 state1 with + | Lt -> Lt + | Eq -> Token.cmp token0 token1 + | Gt -> Gt + end + | Reduce (token0, reduction0), Reduce (token1, reduction1) + -> begin + match Stack.Reduction.cmp reduction0 reduction1 with + | Lt -> Lt + | Eq -> Token.cmp token0 token1 + | Gt -> Gt + end + | Prefix, Prefix -> Eq + | Accept nonterm0, Accept nonterm1 -> Nonterm.cmp nonterm0 nonterm1 + | Reject token0, Reject token1 -> Token.cmp token0 token1 + | _, _ -> not_reached () + end + | Gt -> Gt + + let pp t formatter = + formatter + |> (fun formatter -> + match t with + | ShiftPrefix (token, state) -> begin + formatter + |> Fmt.fmt "ShiftPrefix (" |> Token.pp token + |> Fmt.fmt ", " |> State.pp state + |> Fmt.fmt ")" + end + | ShiftAccept (token, state) -> begin + formatter + |> Fmt.fmt "ShiftAccept (" |> Token.pp token + |> Fmt.fmt ", " |> State.pp state + |> Fmt.fmt ")" + end + | Reduce (token, reduction) -> begin + formatter + |> Fmt.fmt "Reduce (" |> Token.pp token + |> Fmt.fmt ", " |> Stack.Reduction.pp reduction + |> Fmt.fmt ")" + end + | Prefix -> formatter |> Fmt.fmt "Prefix" + | Accept nonterm -> formatter |> Fmt.fmt "Accept " |> Nonterm.pp nonterm + | Reject token -> formatter |> Fmt.fmt "Reject " |> Token.pp token + ) + end + include T + include Identifiable.Make(T) + end + + type t = { + stack: Stack.t; + status: Status.t; + } + + module Start = struct + «starts» + end + + let feed token = function + | {stack={state; _} :: _; status=Prefix} as t -> begin + let token_index = Token.index token in + let Spec.State.{actions; _} = Array.get state Spec.states in + let status = match Map.get token_index actions with + | Some (Spec.Action.ShiftPrefix state') -> Status.ShiftPrefix (token, state') + | Some (Spec.Action.ShiftAccept state') -> Status.ShiftAccept (token, state') + | Some (Spec.Action.Reduce prod_index) -> begin + let Spec.Prod.{callback=callback_index; _} = Array.get prod_index Spec.prods in + let reduction = Stack.Reduction.init callback_index in + Status.Reduce (token, reduction) + end + | None -> Status.Reject token + in + {t with status} + end + | _ -> not_reached () + + let step {stack; status} = + let open Status in + match status with + | ShiftPrefix (token, state) -> + {stack=Stack.shift ~symbol:(Token token) ~state stack; status=Prefix} + | ShiftAccept (token, state) -> begin + (* Shift, perform the ⊥ reduction, and extract the accepted symbol from the stack. *) + let stack = Stack.shift ~symbol:(Token token) ~state stack in + let pseudo_end_index = Token.index Token.PSEUDO_END in + let Spec.State.{actions; _} = Array.get state Spec.states in + match Map.get_hlt pseudo_end_index actions with + | Spec.Action.Reduce prod_index -> begin + let Spec.Prod.{callback=callback_index; _} = Array.get prod_index Spec.prods in + let reduction = Stack.Reduction.init callback_index in + let stack = Stack.reduce ~reduction stack in + match stack with + | [] -> not_reached () + | {symbol=Token _; _} :: _ -> not_reached () + | {symbol=Nonterm nonterm; _} :: _ -> {stack=[]; status=Accept nonterm} + end + | _ -> not_reached () + end + | Reduce (token, reduction) -> begin + feed token {stack=Stack.reduce ~reduction stack; status=Prefix} + end + | _ -> not_reached () + + (* val walk: t -> t *) + let rec walk ({status; _} as t) = + let open Status in + match status with + | ShiftPrefix _ + | ShiftAccept _ + | Reduce _ -> t |> step |> walk + | Prefix + | Accept _ + | Reject _ -> t + + let next token ({status; _} as t) = + match status with + | Status.Prefix -> t |> feed token |> walk + | _ -> not_reached () + end|} + +let expand_ml_algorithm algorithm ~indentation formatter = + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "let algorithm = Algorithm." |> Conf.pp_algorithm algorithm + +let expand_ml_precs precs ~indentation formatter = + let fmt_precs ~indentation formatter = begin + let indent = mk_indent indentation in + let formatter, _first = Precs.fold ~init:(formatter, true) + ~f:(fun (formatter, first) Prec.{index; name; assoc; doms; _} -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt ";\n" + ) + |> (fun formatter -> + formatter + |> indent + |> Fmt.fmt "Prec.init" + |> Fmt.fmt " ~index:" |> ml_uns_pp index + |> Fmt.fmt " ~name:" |> String.pp name + |> Fmt.fmt " ~assoc:" + |> (fun formatter -> + match assoc with + | None -> formatter |> Fmt.fmt "None" + | Some assoc -> formatter |> Fmt.fmt "(Some " |> Assoc.pp assoc |> Fmt.fmt ")" + ) + |> Fmt.fmt " ~doms:(Ordset." + |> (fun formatter -> + match Ordset.length doms with + | 0L -> formatter |> Fmt.fmt "empty (module Uns)" + | 1L -> begin + formatter + |> Fmt.fmt "singleton (module Uns) " + |> (Ordset.choose_hlt doms |> ml_uns_pp) + end + | _ -> begin + formatter + |> Fmt.fmt "of_list (module Uns) " + |> (Ordset.to_list doms |> List.pp ml_uns_pp) + end + ) + |> Fmt.fmt ")" + ), + false + ) precs + in + formatter + end in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "let precs = [|\n" + |> fmt_precs ~indentation:(indentation+4L) |> Fmt.fmt "\n" + |> indent |> Fmt.fmt " |]" + +let expand_ml_prods prods ~indentation formatter = + let fmt_prods ~indentation formatter = begin + let indent = mk_indent indentation in + let formatter, _first = Prods.fold ~init:(formatter, true) + ~f:(fun (formatter, first) Prod.{index; lhs_index; rhs_indexes; prec; callback; _} -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt ";\n" + ) + |> (fun formatter -> + formatter + |> indent + |> Fmt.fmt "Prod.init" + |> Fmt.fmt " ~index:" |> ml_uns_pp index + |> Fmt.fmt " ~lhs_index:" |> ml_uns_pp lhs_index + |> Fmt.fmt " ~rhs_indexes:" |> Array.pp ml_uns_pp rhs_indexes + |> Fmt.fmt "\n" |> indent |> Fmt.fmt " ~prec:" + |> (fun formatter -> + match prec with + | None -> formatter |> Fmt.fmt "None" + | Some prec -> begin + formatter + |> Fmt.fmt "(Some (Array.get " |> ml_uns_pp prec.index |> Fmt.fmt " precs))" + end + ) + |> Fmt.fmt " ~callback:" |> ml_uns_pp callback.index + ), + false + ) prods + in + formatter + end in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "let prods = [|\n" + |> fmt_prods ~indentation:(indentation+4L) |> Fmt.fmt "\n" + |> indent |> Fmt.fmt " |]" + +let expand_ml_symbols symbols ~indentation formatter = + let fmt_symbols ~indentation formatter = begin + let indent = mk_indent indentation in + let formatter, _first_line = Symbols.symbols_fold ~init:(formatter, true) + ~f:(fun (formatter, first_line) + Symbol.{index; name; prec; alias; start; prods; first; follow; _} -> + formatter + |> (fun formatter -> + match first_line with + | true -> formatter + | false -> formatter |> Fmt.fmt ";\n" + ) + |> (fun formatter -> + formatter + |> indent + |> Fmt.fmt "Symbol.init" + |> Fmt.fmt " ~index:" |> ml_uns_pp index + |> Fmt.fmt " ~name:" |> String.pp name + |> Fmt.fmt "\n" |> indent |> Fmt.fmt " ~prec:" + |> (fun formatter -> + match prec with + | None -> formatter |> Fmt.fmt "None" + | Some prec -> begin + formatter + |> Fmt.fmt "(Some (Array.get " |> ml_uns_pp prec.index |> Fmt.fmt " precs))" + end + ) + |> Fmt.fmt " ~alias:" + |> (fun formatter -> + match alias with + | None -> formatter |> Fmt.fmt "None" + | Some alias -> formatter |> Fmt.fmt "(Some " |> String.pp alias |> Fmt.fmt ")" + ) + |> Fmt.fmt " ~start:" |> Bool.pp start + |> Fmt.fmt "\n" |> indent |> Fmt.fmt " ~prods:(" + |> (fun formatter -> + match Ordset.length prods with + | 0L -> formatter |> Fmt.fmt "Ordset.empty (module Prod)" + | 1L -> begin + let Prod.{index; _} = Ordset.choose_hlt prods in + formatter + |> Fmt.fmt "Ordset.singleton (module Prod) (Array.get " + |> ml_uns_pp index + |> Fmt.fmt " prods)" + end + | _ -> begin + formatter + |> Fmt.fmt "Ordset.of_list (module Prod) " + |> List.fmt ~alt:true ~width:indentation (fun Prod.{index; _} formatter -> + formatter + |> Fmt.fmt "Array.get " |> ml_uns_pp index |> Fmt.fmt " prods;" + ) (Ordset.to_list prods) + end + ) + |> Fmt.fmt ")" + |> Fmt.fmt " ~first:(" + |> (fun formatter -> + match Ordset.length first with + | 0L -> formatter |> Fmt.fmt "Ordset.empty (module Uns)" + | 1L -> begin + let symbol_index = Ordset.choose_hlt first in + formatter + |> Fmt.fmt "Ordset.singleton (module Uns) " |> ml_uns_pp symbol_index + end + | _ -> begin + formatter + |> Fmt.fmt "Ordset.of_list (module Uns) " + |> List.fmt ml_uns_pp (Ordset.to_list first) + end + ) + |> Fmt.fmt ")" + |> Fmt.fmt "\n" |> indent |> Fmt.fmt " ~follow:(" + |> (fun formatter -> + match Ordset.length follow with + | 0L -> formatter |> Fmt.fmt "Ordset.empty (module Uns)" + | 1L -> begin + let symbol_index = Ordset.choose_hlt follow in + formatter |> Fmt.fmt "Ordset.singleton (module Uns) " |> ml_uns_pp symbol_index + end + | _ -> begin + formatter + |> Fmt.fmt "Ordset.of_list (module Uns) " + |> List.pp ml_uns_pp (Ordset.to_list follow) + end + ) + |> Fmt.fmt ")" + ), + false + ) symbols + in + formatter + end in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "let symbols = [|\n" + |> fmt_symbols ~indentation:(indentation+4L) |> Fmt.fmt "\n" + |> indent |> Fmt.fmt " |]" + +let expand_ml_lr1Itemset lr1itemset ~indentation formatter = + let indent = mk_indent indentation in + match Lr1Itemset.is_empty lr1itemset with + | false -> begin + formatter + |> indent |> Fmt.fmt "Lr1Itemset.init [\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + Lr1Itemset.fold ~init:formatter + ~f:(fun formatter {lr0item={prod={index=prod_index; _}; dot}; follow} -> + formatter + |> indent |> Fmt.fmt "(\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "let lr0item = Lr0Item.init ~prod:" + |> Fmt.fmt "(Array.get " |> ml_uns_pp prod_index |> Fmt.fmt " prods)" + |> Fmt.fmt " ~dot:" |> ml_uns_pp dot |> Fmt.fmt " in\n" + |> indent |> Fmt.fmt "let lr1item = Lr1Item.init ~lr0item ~follow:(\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "Ordset.of_list (module Uns) " + |> (Ordset.to_list follow |> List.pp ml_uns_pp) + |> Fmt.fmt "\n" + ) + |> indent |> Fmt.fmt " ) in\n" + |> indent |> Fmt.fmt "lr0item, lr1item\n" + ) + |> indent |> Fmt.fmt " );\n" + ) lr1itemset + ) + |> indent |> Fmt.fmt " ]\n" + end + | true -> formatter |> indent |> Fmt.fmt "Lr1Itemset.empty\n" + +let expand_ml_states states ~indentation formatter = + let fmt_states ~indentation formatter = begin + let indent = mk_indent indentation in + let formatter, _first = Array.fold ~init:(formatter, true) + ~f:(fun (formatter, first) + State.{statenub={lr1itemsetclosure={index; kernel; added}; _}; actions; gotos} -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + formatter + |> indent |> Fmt.fmt "(* " |> State.Index.pp index + |> Fmt.fmt " *) State.init\n" + |> indent |> Fmt.fmt " ~lr1ItemsetClosure:(\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "Lr1ItemsetClosure.init\n" + |> indent |> Fmt.fmt " ~index:" + |> ml_uns_pp index |> Fmt.fmt "\n" + |> indent |> Fmt.fmt " ~kernel:(\n" + |> expand_ml_lr1Itemset kernel ~indentation:(indentation+4L) + |> indent |> Fmt.fmt " )\n" + |> indent |> Fmt.fmt " ~added:(\n" + |> expand_ml_lr1Itemset added ~indentation:(indentation+4L) + |> indent |> Fmt.fmt " )\n" + ) + |> indent |> Fmt.fmt " )\n" + |> indent |> Fmt.fmt " ~actions:(\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "Map.of_alist (module Uns) [\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + Ordmap.fold ~init:formatter ~f:(fun formatter (symbol_index, action_set) -> + assert (Ordset.length action_set = 1L); + let action = Ordset.choose_hlt action_set in + formatter + |> indent + |> Fmt.fmt "(" + |> ml_uns_pp symbol_index + |> Fmt.fmt ", Action." + |> State.Action.pp action + |> Fmt.fmt "L);\n" + ) actions + ) + |> indent |> Fmt.fmt " ]\n" + ) + |> indent |> Fmt.fmt " )\n" + |> indent |> Fmt.fmt " ~gotos:(\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + match Ordmap.is_empty gotos with + | false -> begin + formatter + |> indent |> Fmt.fmt "Map.of_alist (module Uns) [\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + Ordmap.fold ~init:formatter ~f:(fun formatter (symbol_index, state_index) -> + formatter + |> indent + |> Fmt.fmt "(" + |> ml_uns_pp symbol_index + |> Fmt.fmt ", " + |> ml_uns_pp state_index + |> Fmt.fmt ");\n" + ) gotos + ) + |> indent |> Fmt.fmt " ]" + end + | true -> formatter |> indent |> Fmt.fmt "Map.empty (module Uns)" + ) + |> Fmt.fmt "\n" + |> indent |> Fmt.fmt " );" + ), + false + ) states + in + formatter + end in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "let states = [|\n" + |> fmt_states ~indentation:(indentation+4L) |> Fmt.fmt "\n" + |> indent |> Fmt.fmt " |]" + +let expand_ml_token_type symbols ~indentation formatter = + let indent = mk_indent indentation in + let fmt_tokens formatter = begin + let formatter, _first = Symbols.tokens_fold ~init:(formatter, true) + ~f:(fun (formatter, first) {name; alias; qtype; _} -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + match qtype with + | {explicit_opt=None; _} -> begin + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + end + | {explicit_opt=Some {module_; type_}; _} -> begin + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + |> Fmt.fmt " of " + |> Fmt.fmt module_ + |> Fmt.fmt "." + |> Fmt.fmt type_ + end + ) + |> (fun formatter -> + match alias with + | None -> formatter + | Some alias -> + formatter |> Fmt.fmt " (* " |> String.fmt ~pretty:true alias |> Fmt.fmt " *)" + ), + false + ) symbols + in + formatter + end in + formatter + |> indent |> Fmt.fmt "type t =\n" + |> fmt_tokens + +let expand_ml_token_index symbols ~indentation formatter = + let indent = mk_indent indentation in + let fmt_token_indexes formatter = begin + let formatter, _first = Symbols.tokens_fold ~init:(formatter, true) + ~f:(fun (formatter, first) {index; name; qtype; _} -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + |> (fun formatter -> + match qtype.explicit_opt with + | None -> formatter + | Some _ -> formatter |> Fmt.fmt " _" + ) + |> Fmt.fmt " -> " + |> ml_uns_pp index + ), + false + ) symbols + in + formatter + end in + formatter + |> indent |> Fmt.fmt "let index = function\n" + |> fmt_token_indexes + +let expand_ml_tokens symbols ~indentation formatter = + formatter + |> expand_ml_token_type symbols ~indentation |> Fmt.fmt "\n" + |> Fmt.fmt "\n" + |> expand_ml_token_index symbols ~indentation + +let expand_ml_nonterm_type symbols ~indentation formatter = + let indent = mk_indent indentation in + let fmt_nonterms formatter = begin + let formatter, _first = Symbols.nonterms_fold ~init:(formatter, true) + ~f:(fun (formatter, first) {name; qtype; _} -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + match qtype with + | {explicit_opt=None; _} -> begin + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + end + | {explicit_opt=Some {module_; type_}; _} -> begin + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + |> Fmt.fmt " of " + |> Fmt.fmt module_ + |> Fmt.fmt "." + |> Fmt.fmt type_ + end + ), + false + ) symbols + in + formatter + end in + formatter + |> indent |> Fmt.fmt "type t =\n" + |> fmt_nonterms + +let expand_ml_nonterm_index symbols ~indentation formatter = + let indent = mk_indent indentation in + let fmt_nonterm_indexes formatter = begin + let formatter, _first = Symbols.nonterms_fold ~init:(formatter, true) + ~f:(fun (formatter, first) {index; name; _} -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + formatter + |> indent + |> Fmt.fmt " | " + |> Fmt.fmt name + |> Fmt.fmt " _ -> " + |> ml_uns_pp index + ), + false + ) symbols + in + formatter + end in + formatter + |> indent |> Fmt.fmt "let index = function\n" + |> fmt_nonterm_indexes + +let expand_ml_nonterms symbols ~indentation formatter = + formatter + |> expand_ml_nonterm_type symbols ~indentation |> Fmt.fmt "\n" + |> Fmt.fmt "\n" + |> expand_ml_nonterm_index symbols ~indentation + +let expand_ml_callbacks hocc_block symbols callbacks ~indentation formatter = + let fmt_callbacks ~indentation formatter = begin + let indent = mk_indent indentation in + let formatter, _first = Callbacks.fold ~init:(formatter, true) + ~f:(fun (formatter, first) (Callback.{index; lhs_name; rhs; code; _} as callback) -> + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt ";\n" + ) + |> (fun formatter -> + formatter + |> indent |> Fmt.fmt "(* " |> Callback.Index.pp index + |> Fmt.fmt " *) " + |> (fun formatter -> + match Option.is_empty code with + | false -> begin + let underline = Codepoint.of_char '_' in + let overline = Codepoint.kv 0x203eL (*'‾'*) in + let code = Option.value_hlt code in + let source = Parse.source_of_code hocc_block code in + formatter + |> Fmt.fmt "(function\n" + |> (fun formatter -> + let formatter, _first = + Callback.Params.fold_right ~init:(formatter, true) + ~f:(fun (formatter, first) + Callback.Param.{binding; symbol_name; _} -> + let is_token = + Symbols.symbol_of_name symbol_name symbols + |> Option.value_hlt + |> Symbol.is_token + in + let symbol_constructor = match is_token with + | true -> "Token" + | false -> "Nonterm" + in + formatter + |> indent + |> Fmt.fmt (match first with + | true -> " | " + | false -> " :: " + ) + |> (fun formatter -> + match binding with + | Some uname -> begin + formatter + |> Fmt.fmt "Elm.{symbol=Symbol." + |> Fmt.fmt symbol_constructor + |> Fmt.fmt " (" + |> Fmt.fmt symbol_name + |> Fmt.fmt " " + |> Fmt.fmt uname + |> Fmt.fmt "); _}" + end + | None -> formatter |> Fmt.fmt "_" + ) + |> Fmt.fmt "\n" + , false + ) rhs + in + formatter + ) + |> indent |> Fmt.fmt " " + |> Fmt.fmt (match Callback.is_epsilon callback with false -> ":: " | true -> "") + |> Fmt.fmt "tl -> Symbol.Nonterm (" + |> Fmt.fmt lhs_name |> Fmt.fmt " (\n" + |> indent + |> String.fmt ~pad:underline ~just:Fmt.Left ~width:(98L - indentation) " (*" + |> Fmt.fmt "*)" + |> fmt_ml_source_directive source + |> Fmt.fmt (Hmc.Source.Slice.to_string source) + |> Fmt.fmt "\n" + |> indent + |> String.fmt ~pad:overline ~just:Fmt.Left ~width:(98L - indentation) " (*" + |> Fmt.fmt "*)\n" + |> indent |> Fmt.fmt " )), tl\n" + |> (fun formatter -> + match Callback.is_epsilon callback with + | false -> formatter |> indent |> Fmt.fmt " | _ -> not_reached ()\n" + | true -> formatter + ) + |> indent |> Fmt.fmt ")" + end + | true -> formatter |> Fmt.fmt "(fun _stack -> not_reached ())" + ) + ), + false + ) callbacks + in + formatter + end in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "let callbacks = [|\n" + |> fmt_callbacks ~indentation:(indentation+4L) |> Fmt.fmt "\n" + |> indent |> Fmt.fmt " |]" + +let expand_ml_starts symbols states ~indentation formatter = + let indent = mk_indent indentation in + let formatter, _first = Symbols.nonterms_fold ~init:(formatter, true) + ~f:(fun (formatter, first) {name; qtype={synthetic; _}; start; _} -> + match (start && (not synthetic)) with + | false -> formatter, first + | true -> begin + let synthetic_name = Spec.synthetic_name_of_start_name name in + let synthetic_start_symbol = + Symbols.symbol_of_name synthetic_name symbols |> Option.value_hlt in + let state = + state_of_synthetic_start_symbol symbols states synthetic_start_symbol in + formatter + |> (fun formatter -> + match first with + | true -> formatter + | false -> formatter |> Fmt.fmt "\n" + ) + |> (fun formatter -> + formatter + |> indent |> Fmt.fmt "module " |> String.fmt name |> Fmt.fmt " = struct\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "let boi = {\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "stack=[{\n" + |> (fun formatter -> + let indentation = indentation + 4L in + let indent = mk_indent indentation in + formatter + |> indent |> Fmt.fmt "symbol=Token Token.EPSILON;\n" + |> indent |> Fmt.fmt "state=State.init " |> State.(index state |> ml_uns_pp) + |> Fmt.fmt ";\n" + ) + |> indent |> Fmt.fmt " }];\n" + |> indent |> Fmt.fmt "status=Prefix;\n" + ) + |> indent |> Fmt.fmt " }\n" + ) + |> indent |> Fmt.fmt " end" + ), + false + end + ) symbols + in + formatter + +let expand_ml_template template_indentation template hocc_block + Spec.{algorithm; precs; symbols; prods; callbacks; states} formatter = + let expanders = Map.of_alist (module String) [ + ("«algorithm»", expand_ml_algorithm algorithm); + ("«precs»", expand_ml_precs precs); + ("«prods»", expand_ml_prods prods); + ("«symbols»", expand_ml_symbols symbols); + ("«states»", expand_ml_states states); + ("«tokens»", expand_ml_tokens symbols); + ("«nonterms»", expand_ml_nonterms symbols); + ("«callbacks»", expand_ml_callbacks hocc_block symbols callbacks); + ("«starts»", expand_ml_starts symbols states) + ] in + formatter |> expand ~template_indentation template expanders + +let generate_ml conf + Parse.(Hmh {prelude; hocc_=(Hocc {hocc_; _} as hocc_block); postlude; eoi}) io spec = + assert (Spec.conflicts spec = 0L); + let indentation = indentation_of_hocc hocc_ in + let module_name = module_name conf in + let hmh_name = module_name ^ ".hmh" in + let io = + io.ml + |> Fmt.fmt "(* This file was generated by `hocc` based on " + |> Fmt.fmt (String.to_string ~pretty:true hmh_name) + |> Fmt.fmt " *)\n" + + |> (fun formatter -> + match prelude with + | Parse.Matter {token_; _} -> begin + let base = match token_ with + | HmcToken {source; _} -> Hmc.Source.Slice.base source + | HoccToken _ -> not_reached () + in + let past = match hocc_ with + | HmcToken _ -> not_reached () + | HoccToken {source; _} -> Hmc.Source.Slice.base source + in + let source = Hmc.Source.Slice.of_cursors ~base ~past in + formatter |> Fmt.fmt (Hmc.Source.Slice.to_string source) + end + | MatterEpsilon -> formatter + ) + |> expand_ml_template indentation ml_template hocc_block spec + |> (fun formatter -> + match postlude with + | Parse.Matter _ -> begin + let base = Parse.postlude_base_of_hocc hocc_block in + let past = match eoi with + | HmcToken {source; _} -> Hmc.Source.Slice.past source + | HoccToken _ -> not_reached () + in + let source = Hmc.Source.Slice.of_cursors ~base ~past in + formatter + |> fmt_ml_source_directive source + |> Fmt.fmt (Hmc.Source.Slice.to_string source) + end + | MatterEpsilon -> formatter + ) + |> Io.with_ml io + in + io diff --git a/bootstrap/bin/hocc/code.mli b/bootstrap/bin/hocc/code.mli new file mode 100644 index 000000000..5c208e258 --- /dev/null +++ b/bootstrap/bin/hocc/code.mli @@ -0,0 +1,19 @@ +(** Hemlock/OCaml code generation. *) + +open! Basis +open! Basis.Rudiments + +val generate_hmi: Conf.t -> Parse.nonterm_hmhi -> Io.t -> Spec.t -> Io.t +(** [generate_hmi conf hmhi io spec] integrates a Hemlock interface (.hmi) representation of [spec] + into [io]. +*) + +val generate_hm: Conf.t -> Parse.nonterm_hmh -> Io.t -> Spec.t -> Io.t +(** [generate_hm conf hmh io spec] integrates a Hemlock (.hm) representation of [spec] into [io]. *) + +val generate_mli: Conf.t -> Parse.nonterm_hmhi -> Io.t -> Spec.t -> Io.t +(** [generate_mli conf hmhi io spec] integrates an OCaml interface (.mli) representation of [spec] + into [io]. *) + +val generate_ml: Conf.t -> Parse.nonterm_hmh -> Io.t -> Spec.t -> Io.t +(** [generate_ml conf hmh io spec] integrates an OCaml (.ml) representation of [spec] into [io]. *) diff --git a/bootstrap/bin/hocc/conf.ml b/bootstrap/bin/hocc/conf.ml index f5a3faa42..9473e79eb 100644 --- a/bootstrap/bin/hocc/conf.ml +++ b/bootstrap/bin/hocc/conf.ml @@ -71,12 +71,11 @@ Parameters: -h[elp] : Print command usage and exit. -v[erbose] : Print progress information during parser generation. -txt | -text : Write a detailed automoton description in plain text - format to "/hocc/.txt". + format to "/hocc/.txt". -html : Write a detailed automoton description in internally - hyperlinked HTML format to - "/hocc/.html". + hyperlinked HTML format to "/hocc/.html". -hmh | -hocc : Write a complete grammar specification in hocc format to - "/hocc/.hmh", but with all non-terminal + "/hocc/.hmh", but with all non-terminal types and reduction code omitted. -a[lgorithm] : Use the specified orithm for generating an automoton. Defaults to lr1. @@ -92,9 +91,9 @@ Parameters: -r[esolve] (yes|no) : Control whether conflict resolution is enabled. Defaults to yes. -hm | -hemlock : Generate a Hemlock-based parser implementation and write - it to "/.hm[i]". + it to "/.hm[i]". -ml | -ocaml : Generate an OCaml-based parser implementation and write it - to "/.ml[i]". This is brittle + to "/.ml[i]". This is brittle functionality intended only for Hemlock bootstrapping. -s[rc] : Path and module name of input source, where inputs match ".hmh[i]" and "" comprises the source directory diff --git a/bootstrap/bin/hocc/description.ml b/bootstrap/bin/hocc/description.ml new file mode 100644 index 000000000..2f0dfcb76 --- /dev/null +++ b/bootstrap/bin/hocc/description.ml @@ -0,0 +1,519 @@ +open Basis +open! Basis.Rudiments + +type description = + | DescriptionTxt + | DescriptionHtml + +let generate_description conf io description Spec.{algorithm; precs; symbols; prods; states; _} = + let sink _ formatter = formatter in + let passthrough s formatter = formatter |> Fmt.fmt s in + let txt = match description with + | DescriptionTxt -> passthrough + | DescriptionHtml -> sink + in + let html = match description with + | DescriptionTxt -> sink + | DescriptionHtml -> passthrough + in + let pp_symbol_index symbol_index formatter = begin + let symbol = Symbols.symbol_of_symbol_index symbol_index symbols in + let pretty_name = match symbol.alias with + | None -> symbol.name + | Some alias -> + String.Fmt.empty + |> txt "\"" |> html "“" + |> Fmt.fmt alias + |> txt "\"" |> html "”" + |> Fmt.to_string + in + formatter |> html " html symbol.name |> html "\">" + |> Fmt.fmt pretty_name |> html "" + end in + let pp_symbol_set symbol_set formatter = begin + formatter + |> Fmt.fmt "{" + |> (fun formatter -> + Ordset.foldi ~init:formatter ~f:(fun i formatter symbol_index -> + formatter + |> (fun formatter -> match i with 0L -> formatter | _ -> formatter |> Fmt.fmt ", ") + |> pp_symbol_index symbol_index + ) symbol_set + ) + |> Fmt.fmt "}" + end in + let pp_prec prec_ind formatter = begin + let ref_name = (Precs.prec_of_prec_index prec_ind precs).name in + formatter + |> Fmt.fmt "prec " |> html " html ref_name |> html "\">" + |> Fmt.fmt ref_name + |> html "" + end in + let pp_prod ?(do_pp_prec=true) Prod.{lhs_index; rhs_indexes; prec; _} formatter = begin + let lhs_name = Symbol.name (Symbols.symbol_of_symbol_index lhs_index symbols) in + formatter + |> html " html lhs_name |> html "\">" + |> Fmt.fmt lhs_name + |> html "" |> Fmt.fmt " ::=" + |> (fun formatter -> + match Array.length rhs_indexes with + | 0L -> formatter |> Fmt.fmt " epsilon" + | _ -> begin + Array.fold ~init:formatter ~f:(fun formatter rhs_index -> + let rhs_name = Symbol.name (Symbols.symbol_of_symbol_index rhs_index symbols) in + formatter + |> Fmt.fmt " " + |> html " html rhs_name |> html "\">" + |> pp_symbol_index rhs_index + |> html "" + ) rhs_indexes + end + ) + |> (fun formatter -> + match do_pp_prec, prec with + | false, _ + | _, None -> formatter + | true, Some {index=prec_ind; _} -> formatter |> Fmt.fmt " " |> pp_prec prec_ind + ) + end in + let pp_lr0item lr0item formatter = begin + let Lr0Item.{prod; dot} = lr0item in + let Prod.{lhs_index; rhs_indexes; _} = prod in + formatter + |> Fmt.fmt (Symbol.name (Symbols.symbol_of_symbol_index lhs_index symbols)) + |> Fmt.fmt " ::=" + |> (fun formatter -> + Array.foldi ~init:formatter ~f:(fun i formatter rhs_index -> + formatter + |> Fmt.fmt (match i = dot with + | false -> "" + | true -> " ·" + ) + |> Fmt.fmt " " + |> pp_symbol_index rhs_index + ) rhs_indexes + |> Fmt.fmt ( + match Array.length rhs_indexes = dot with + | false -> "" + | true -> " ·" + ) + ) + end in + let pp_lr1item ?(do_pp_prec=true) lr1item formatter = begin + let Lr1Item.{lr0item; _} = lr1item in + let Lr0Item.{prod; _} = lr0item in + let Prod.{prec; _} = prod in + formatter + |> Fmt.fmt "[" + |> pp_lr0item lr0item + |> Fmt.fmt ", {" + |> (fun formatter -> + Array.foldi ~init:formatter ~f:(fun i formatter symbol_index -> + formatter + |> Fmt.fmt (match i with + | 0L -> "" + | _ -> ", " + ) + |> pp_symbol_index symbol_index + ) (Ordset.to_array Lr1Item.(lr1item.follow)) + ) + |> Fmt.fmt "}]" + |> (fun formatter -> + match do_pp_prec, prec with + | false, _ + | _, None -> formatter + | true, Some {index=prec_index; _} -> formatter |> Fmt.fmt " " |> pp_prec prec_index + ) + end in + let pp_state_index state_index formatter = begin + let state_index_string = String.Fmt.empty |> State.Index.pp state_index |> Fmt.to_string in + formatter + |> html " html state_index_string |> html "\">" + |> Fmt.fmt state_index_string + |> html "" + end in + let pp_action symbol_index action formatter = begin + let pp_symbol_prec symbol_index formatter = begin + let symbol = Symbols.symbol_of_symbol_index symbol_index symbols in + match symbol.prec with + | None -> formatter + | Some Prec.{index; _} -> formatter |> Fmt.fmt " " |> pp_prec index + end in + let pp_reduce_prec Prod.{lhs_index; prec; _} formatter = begin + match prec with + | Some _ -> formatter + | None -> formatter |> pp_symbol_prec lhs_index + end in + let open State.Action in + match action with + | ShiftPrefix state_index -> + formatter + |> Fmt.fmt "ShiftPrefix " |> pp_state_index state_index + |> pp_symbol_prec symbol_index + | ShiftAccept state_index -> + formatter + |> Fmt.fmt "ShiftAccept " |> pp_state_index state_index + |> pp_symbol_prec symbol_index + | Reduce prod_index -> begin + let prod = Prods.prod_of_prod_index prod_index prods in + formatter |> Fmt.fmt "Reduce " |> pp_prod prod + |> pp_reduce_prec prod + end + end in + let pp_contrib contrib formatter = begin + assert ((Contrib.length contrib) = 1L); + assert (not (Contrib.mem_shift contrib)); + let prod_index = Contrib.reduces contrib |> Ordset.choose_hlt in + let prod = Prods.prod_of_prod_index prod_index prods in + formatter + |> Fmt.fmt "Reduce " + |> pp_prod ~do_pp_prec:false prod + end in + let io = + io.log + |> Fmt.fmt "hocc: Generating " + |> txt "text" |> html "html" + |> Fmt.fmt " report\n" + |> Io.with_log io + in + let nprecs = Precs.length precs in + let states_algorithm = match Conf.algorithm conf with + | Lr1 -> "LR(1)" + | Ielr1 -> "IELR(1)" + | Pgm1 -> "PGM(1)" + | Lalr1 -> "LALR(1)" + in + (match description with + | DescriptionTxt -> io.txt + | DescriptionHtml -> io.html + ) + |> html "\n" + |> html "\n" + |> html "

" |> Fmt.fmt (Path.Segment.to_string_hlt (Conf.module_ conf)) + |> Fmt.fmt " grammar" |> html "

" |> Fmt.fmt "\n" + |> Fmt.fmt "\n" + |> html "

Sections

\n" + |> html " \n" + |> html "
\n" + |> (fun formatter -> match nprecs with + | 0L -> formatter + | _ -> + formatter |> html "

" |> Fmt.fmt "Precedences" + |> (fun formatter -> match (Conf.resolve conf) with + | true -> formatter + | false -> formatter |> Fmt.fmt " (conflict resolution disabled)" + ) + |> html "

" + |> Fmt.fmt"\n" + ) + |> html "
    \n" + |> (fun formatter -> + Precs.fold ~init:formatter ~f:(fun formatter Prec.{name; assoc; doms; _} -> + formatter + |> Fmt.fmt " " |> html "
  • " + |> Fmt.fmt (match assoc with + | None -> "neutral" + | Some Left -> "left" + | Some Right -> "right" + ) + |> Fmt.fmt " " |> html " html name |> html "\">" + |> Fmt.fmt name + |> html "" + |> (fun formatter -> + match Ordset.is_empty doms with + | true -> formatter + | false -> begin + let _, formatter = Ordset.fold ~init:(true, formatter) + ~f:(fun (first, formatter) prec_ind -> + let ref_name = (Precs.prec_of_prec_index prec_ind precs).name in + let formatter = + formatter + |> Fmt.fmt (match first with + | true -> " < " + | false -> ", " + ) + |> html " html ref_name |> html "\">" + |> Fmt.fmt ref_name + |> html "" + in + (false, formatter) + ) doms + in + formatter + end + ) + |> html "
  • " |> Fmt.fmt "\n" + ) precs + ) + |> html "
\n" + |> html "

" |> Fmt.fmt "Tokens" |> html "

" |> Fmt.fmt "\n" + |> html "
    \n" + |> (fun formatter -> + Symbols.symbols_fold ~init:formatter + ~f:(fun formatter (Symbol.{name; alias; qtype; prec; first; follow; _} as symbol) -> + match Symbol.is_token symbol with + | false -> formatter + | true -> begin + formatter + |> Fmt.fmt " " |> html "
  • " |> Fmt.fmt "token " + |> html " html name |> html "\">" + |> Fmt.fmt name + |> html "" + |> (fun formatter -> + match alias with + | None -> formatter + | Some alias -> formatter |> Fmt.fmt " " |> String.pp alias + ) + |> (fun formatter -> + match qtype with + | {explicit_opt=None; _} -> formatter + | {explicit_opt=Some {module_; type_}; _} -> + formatter |> Fmt.fmt " of " |> Fmt.fmt module_ |> Fmt.fmt "." |> Fmt.fmt type_ + ) + |> (fun formatter -> + match prec with + | None -> formatter + | Some {index=prec_index; _} -> formatter |> Fmt.fmt " " |> pp_prec prec_index + ) + |> Fmt.fmt "\n" + |> html "
      \n" + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "First: " + |> pp_symbol_set first + |> html "
    • " |> Fmt.fmt "\n" + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Follow: " + |> pp_symbol_set follow + |> html "
    • " |> Fmt.fmt "\n" + |> html "
    \n" + |> html "
  • \n" + end + ) symbols + ) + |> html "
\n" + |> html "

" |> Fmt.fmt "Non-terminals" |> html "

" |> Fmt.fmt "\n" + |> html "
    \n" + |> (fun formatter -> + Symbols.symbols_fold ~init:formatter + ~f:(fun formatter (Symbol.{name; start; qtype; prods; first; follow; _} as symbol) -> + match Symbol.is_nonterm symbol with + | false -> formatter + | true -> begin + formatter + |> Fmt.fmt " " |> html "
  • " + |> Fmt.fmt (match start with + | true -> "start " + | false -> "nonterm " + ) + |> html " html name |> html "\">" + |> Fmt.fmt name + |> html "" + |> (fun formatter -> + match qtype with + | {explicit_opt=None; _} -> formatter + | {explicit_opt=Some {module_; type_}; _} -> + formatter |> Fmt.fmt " of " |> Fmt.fmt module_ |> Fmt.fmt "." |> Fmt.fmt type_ + ) + |> Fmt.fmt "\n" + |> html "
      \n" + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "First: " + |> pp_symbol_set first + |> html "
    • " |> Fmt.fmt "\n" + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Follow: " + |> pp_symbol_set follow + |> html "
    • " |> Fmt.fmt "\n" + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Productions\n" + |> html "
        \n" + |> (fun formatter -> + Ordset.fold ~init:formatter + ~f:(fun formatter prod -> + formatter + |> Fmt.fmt " " |> html "
      • " + |> pp_prod prod + |> html "
      • " |> Fmt.fmt "\n" + ) prods + |> html "
      \n" + |> html "
    • \n" + ) + |> html "
    \n" + |> html "
  • \n" + end + ) symbols + ) + |> html "
\n" + |> html "

" |> Fmt.fmt states_algorithm |> Fmt.fmt " States" |> html "

" + |> Fmt.fmt "\n" + |> html "
    \n" + |> (fun formatter -> + Array.fold ~init:formatter + ~f:(fun formatter (State.{statenub; actions; gotos; _} as state) -> + let state_index_string = + String.Fmt.empty |> StateNub.Index.pp (StateNub.index statenub) + |> Fmt.to_string in + formatter + |> Fmt.fmt " " |> html "
  • " |> Fmt.fmt "State " + |> html " html state_index_string |> html "\">" + |> Fmt.fmt state_index_string + |> (fun formatter -> + match algorithm with + | Lr1 + | Ielr1 + | Pgm1 -> begin + formatter + |> Fmt.fmt " [" + |> Uns.pp (StateNub.isocores_sn statenub) + |> Fmt.fmt "." + |> Uns.pp (StateNub.isocore_set_sn statenub) + |> Fmt.fmt "]" + end + | Lalr1 -> formatter + ) + |> html "" |> Fmt.fmt "\n" + |> html "
      \n" + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Kernel\n" + |> html "
        \n" + |> (fun formatter -> + Lr1Itemset.fold ~init:formatter ~f:(fun formatter lr1itemset -> + formatter + |> Fmt.fmt " " |> html "
      • " + |> pp_lr1item lr1itemset + |> html "
      • " |> Fmt.fmt "\n" + ) statenub.lr1itemsetclosure.kernel + ) + |> html "
      \n" + |> html "
    • \n" + |> (fun formatter -> + match Lr1Itemset.is_empty statenub.lr1itemsetclosure.added with + | true -> formatter + | false -> begin + formatter + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Added\n" + |> html "
        \n" + |> (fun formatter -> + Lr1Itemset.fold ~init:formatter ~f:(fun formatter lr1itemset -> + formatter |> Fmt.fmt " " |> html "
      • " + |> pp_lr1item lr1itemset + |> html "
      • " |> Fmt.fmt "\n" + ) statenub.lr1itemsetclosure.added + ) + |> html "
      \n" + |> html "
    • \n" + end + ) + |> (fun formatter -> + let has_pseudo_end_conflict = State.has_pseudo_end_conflict state in + formatter + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Actions\n" + |> html "
        \n" + |> (fun formatter -> + Ordmap.fold ~init:formatter ~f:(fun formatter (symbol_index, action_set) -> + formatter + |> (fun formatter -> + match has_pseudo_end_conflict && symbol_index = Symbol.pseudo_end.index with + | false -> formatter |> Fmt.fmt " " |> html "
      • " + | true -> formatter |> txt "CONFLICT " |> html "
      • CONFLICT " + ) + |> pp_symbol_index symbol_index |> Fmt.fmt " :" + |> (fun formatter -> + match Ordset.length action_set with + | 1L -> begin + formatter + |> Fmt.fmt " " + |> pp_action symbol_index (Ordset.choose_hlt action_set) + |> html "
      • " |> Fmt.fmt "\n" + end + | _ -> begin + formatter + |> html " CONFLICT" |> Fmt.fmt "\n" + |> html "
          \n" + |> (fun formatter -> + Ordset.fold ~init:formatter ~f:(fun formatter action -> + formatter + |> txt "CONFLICT " |> html "
        • " + |> pp_action symbol_index action + |> html "
        • " |> Fmt.fmt "\n" + ) action_set + ) + |> html "
        \n" + end + ) + ) actions + ) + |> html "
      \n" + |> html "
    • \n" + ) + |> (fun formatter -> + match Ordmap.is_empty gotos with + | true -> formatter + | false -> begin + formatter + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Gotos\n" + |> html "
        \n" + |> (fun formatter -> + Ordmap.fold ~init:formatter ~f:(fun formatter (symbol_index, state_index) -> + formatter + |> Fmt.fmt " " |> html "
      • " + |> pp_symbol_index symbol_index |> Fmt.fmt " : " |> State.Index.pp state_index + |> html "
      • " |> Fmt.fmt "\n" + ) gotos + ) + |> html "
      \n" + |> html "
    • \n" + end + ) + |> (fun formatter -> + let kernel_attribs = StateNub.filtered_kernel_attribs statenub in + match KernelAttribs.length kernel_attribs with + | 0L -> formatter + | _ -> begin + let kernel_attribs = StateNub.filtered_kernel_attribs statenub in + formatter + |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Conflict contributions\n" + |> html "
        \n" + |> (fun formatter -> + KernelAttribs.fold ~init:formatter ~f:(fun formatter (kernel_item, attribs) -> + formatter + |> Fmt.fmt " " |> pp_lr1item ~do_pp_prec:false kernel_item + |> Fmt.fmt "\n" + |> html "
          \n" + |> (fun formatter -> + Attribs.fold ~init:formatter + ~f:(fun formatter Attrib.{conflict_state_index; contrib; _} -> + formatter + |> Fmt.fmt " " |> html "
        • " + |> pp_state_index conflict_state_index + |> Fmt.fmt " : " + |> pp_contrib contrib + |> html "
        • " |> Fmt.fmt "\n" + ) attribs + ) + |> html "
        \n" + ) kernel_attribs + ) + |> html "
      \n" + |> html "
    • \n" + end + ) + |> html "
    \n" + |> html "
  • \n" + ) states + ) + |> html "
\n" + |> html "\n" + |> html "\n" + |> (match description with + | DescriptionTxt -> Io.with_txt io + | DescriptionHtml -> Io.with_html io + ) + +let generate_txt conf io spec = + generate_description conf io DescriptionTxt spec + +let generate_html conf io spec = + generate_description conf io DescriptionHtml spec diff --git a/bootstrap/bin/hocc/description.mli b/bootstrap/bin/hocc/description.mli new file mode 100644 index 000000000..b8d8d0f24 --- /dev/null +++ b/bootstrap/bin/hocc/description.mli @@ -0,0 +1,7 @@ +(** Text/html description generation. *) + +val generate_txt: Conf.t -> Io.t -> Spec.t -> Io.t +(** [generate_txt conf io spec] integrates a text representation of [spec] into [io]. *) + +val generate_html: Conf.t -> Io.t -> Spec.t -> Io.t +(** [generate_html conf io spec] integrates an html representation of [spec] into [io]. *) diff --git a/bootstrap/bin/hocc/grammar.ml b/bootstrap/bin/hocc/grammar.ml new file mode 100644 index 000000000..3c2060673 --- /dev/null +++ b/bootstrap/bin/hocc/grammar.ml @@ -0,0 +1,27 @@ +open Basis +open! Basis.Rudiments + +let generate_hocc io Spec.{precs; symbols; _} = + let io = io.log |> Fmt.fmt "hocc: Generating hocc report\n" |> Io.with_log io in + io.hocc + |> Fmt.fmt "hocc\n" + |> (fun formatter -> + Precs.fold ~init:formatter ~f:(fun formatter prec -> + formatter |> Prec.src_fmt prec + ) precs + ) + |> (fun formatter -> + Symbols.symbols_fold ~init:formatter ~f:(fun formatter symbol -> + match Symbol.is_token symbol && not (Symbol.is_synthetic symbol) with + | false -> formatter + | true -> formatter |> Symbols.src_fmt symbol symbols + ) symbols + ) + |> (fun formatter -> + Symbols.symbols_fold ~init:formatter ~f:(fun formatter symbol -> + match Symbol.is_nonterm symbol && not (Symbol.is_synthetic symbol) with + | false -> formatter + | true -> formatter |> Symbols.src_fmt symbol symbols + ) symbols + ) + |> Io.with_hocc io diff --git a/bootstrap/bin/hocc/grammar.mli b/bootstrap/bin/hocc/grammar.mli new file mode 100644 index 000000000..bb5d0de64 --- /dev/null +++ b/bootstrap/bin/hocc/grammar.mli @@ -0,0 +1,4 @@ +(** hocc grammar generation. *) + +val generate_hocc: Io.t -> Spec.t -> Io.t +(** [generate_hocc conf io spec] integrates a hocc representation of [spec]'s grammar into [io]. *) diff --git a/bootstrap/bin/hocc/hocc.ml b/bootstrap/bin/hocc/hocc.ml index 9796057bd..58f7964dd 100644 --- a/bootstrap/bin/hocc/hocc.ml +++ b/bootstrap/bin/hocc/hocc.ml @@ -45,35 +45,57 @@ let _ = let io, spec = Spec.init (Conf.algorithm conf) ~resolve:(Conf.resolve conf) io hmh in let io = match Conf.text conf with | false -> io - | true -> Spec.to_txt conf io spec + | true -> Description.generate_txt conf io spec in let io = match Conf.html conf with | false -> io - | true -> Spec.to_html conf io spec + | true -> Description.generate_html conf io spec in let io = match Conf.hocc conf with | false -> io - | true -> Spec.to_hocc io spec + | true -> Grammar.generate_hocc io spec in + let nconflicts = Spec.conflicts spec in + let conflicts = nconflicts <> 0L in let io = match Conf.hemlock conf with | false -> io | true -> begin - let io = match hmhi_opt with - | None -> io - | Some hmhi -> Spec.to_hmi conf hmhi io spec - in - Spec.to_hm conf hmh io spec + match conflicts with + | false -> begin + let io = match hmhi_opt with + | None -> io + | Some hmhi -> Code.generate_hmi conf hmhi io spec + in + Code.generate_hm conf hmh io spec + end + | true -> begin + io.err + |> Fmt.fmt "hocc: Hemlock code not generated due to conflict" + |> Fmt.fmt (match nconflicts with 1L -> "" | _ -> "s") + |> Fmt.fmt "\n" + |> Io.with_err io + end end in let io = match Conf.ocaml conf with | false -> io | true -> begin - let io = match hmhi_opt with - | None -> io - | Some hmhi -> Spec.to_mli conf hmhi io spec - in - Spec.to_ml conf hmh io spec + match conflicts with + | false -> begin + let io = match hmhi_opt with + | None -> io + | Some hmhi -> Code.generate_mli conf hmhi io spec + in + Code.generate_ml conf hmh io spec + end + | true -> begin + io.err + |> Fmt.fmt "hocc: OCaml code not generated due to conflict" + |> Fmt.fmt (match nconflicts with 1L -> "" | _ -> "s") + |> Fmt.fmt "\n" + |> Io.with_err io + end end in - let _io = Io.fini conf io in + let _io = Io.fini conf conflicts io in () diff --git a/bootstrap/bin/hocc/io.ml b/bootstrap/bin/hocc/io.ml index a1288afbe..70ae0ff08 100644 --- a/bootstrap/bin/hocc/io.ml +++ b/bootstrap/bin/hocc/io.ml @@ -126,25 +126,30 @@ let open_outfile_as_formatter ~is_report ~err path = | Ok f -> File.Fmt.of_t f | Error error -> open_error ~err path error -let fini_formatter ?(is_report=false) conf ~err ~log formatter suffix = - match Fmt.sync formatter with - | To_string s -> begin - let path = path_with_suffix ~is_report conf suffix in - let log' = log |> Fmt.fmt "hocc: Writing " |> Path.pp path |> Fmt.fmt "\n" in - let formatter = open_outfile_as_formatter ~is_report ~err path in - let formatter' = formatter |> Fmt.fmt s |> Fmt.flush in - log', formatter' +let fini_formatter ?(is_report=false) conf conflicts ~err ~log formatter suffix = + match is_report || (not conflicts) with + | true -> begin + match Fmt.sync formatter with + | To_string s -> begin + let path = path_with_suffix ~is_report conf suffix in + let log' = log |> Fmt.fmt "hocc: Writing " |> Path.pp path |> Fmt.fmt "\n" in + let formatter = open_outfile_as_formatter ~is_report ~err path in + let formatter' = formatter |> Fmt.fmt s |> Fmt.flush in + log', formatter' + end + | Synced formatter' -> log, formatter' end - | Synced formatter' -> log, formatter' - -let fini conf ({err; log; txt; html; hocc; hmi; hm; mli; ml; _} as t) = - let log, txt = fini_formatter ~is_report:true conf ~err ~log txt ".txt" in - let log, html = fini_formatter ~is_report:true conf ~err ~log html ".html" in - let log, hocc = fini_formatter ~is_report:true conf ~err ~log hocc ".hmh" in - let log, hmi = fini_formatter conf ~err ~log hmi ".hmi" in - let log, hm = fini_formatter conf ~err ~log hm ".hm" in - let log, mli = fini_formatter conf ~err ~log mli ".mli" in - let log, ml = fini_formatter conf ~err ~log ml ".ml" in + | false -> log, formatter + +let fini conf conflicts ({err; log; txt; html; hocc; hmi; hm; mli; ml; _} as t) = + let log, txt = fini_formatter ~is_report:true conf conflicts ~err ~log txt ".txt" in + let log, html = fini_formatter ~is_report:true conf conflicts ~err ~log html ".html" in + let log, hocc = fini_formatter ~is_report:true conf conflicts ~err ~log hocc ".hmh" in + + let log, hmi = fini_formatter conf conflicts ~err ~log hmi ".hmi" in + let log, hm = fini_formatter conf conflicts ~err ~log hm ".hm" in + let log, mli = fini_formatter conf conflicts ~err ~log mli ".mli" in + let log, ml = fini_formatter conf conflicts ~err ~log ml ".ml" in let log = Fmt.flush log in {t with log; txt; html; hocc; hmi; hm; mli; ml} diff --git a/bootstrap/bin/hocc/io.mli b/bootstrap/bin/hocc/io.mli index c084578d8..7add211f4 100644 --- a/bootstrap/bin/hocc/io.mli +++ b/bootstrap/bin/hocc/io.mli @@ -6,6 +6,7 @@ conditional output logic. *) open Basis +open! Basis.Rudiments type t = { err: (module Fmt.Formatter); @@ -24,8 +25,9 @@ type t = { val init: Conf.t -> t (** [init conf] initializes formatters according to [conf]. *) -val fini: Conf.t -> t -> t -(** Write and flush results to files. *) +val fini: Conf.t -> bool -> t -> t +(** [fini conf conflicts t] writes and flushes results to files. If there are conflicts, only report + files are written. *) val fatal: t -> 'a (** Flush error output and exit. *) diff --git a/bootstrap/bin/hocc/lr0Itemset.ml b/bootstrap/bin/hocc/lr0Itemset.ml index 789cd3ba0..af0f10531 100644 --- a/bootstrap/bin/hocc/lr0Itemset.ml +++ b/bootstrap/bin/hocc/lr0Itemset.ml @@ -4,15 +4,11 @@ open! Basis.Rudiments module T = struct type t = (Lr0Item.t, Lr0Item.cmper_witness) Ordset.t - let hash_fold t = - Ordset.hash_fold t + let hash_fold = Ordset.hash_fold - let cmp t0 t1 = - Ordset.cmp t0 t1 + let cmp = Ordset.cmp - let pp t formatter = - formatter - |> Ordset.pp t + let pp = Ordset.pp end include T include Identifiable.Make(T) diff --git a/bootstrap/bin/hocc/lr1Itemset.ml b/bootstrap/bin/hocc/lr1Itemset.ml index dd7cdf7d7..8aab59cb7 100644 --- a/bootstrap/bin/hocc/lr1Itemset.ml +++ b/bootstrap/bin/hocc/lr1Itemset.ml @@ -168,6 +168,15 @@ let is_start {items; _} = is_start, is_start ) items +let start_symbol_index {items; _} = + Ordmap.fold_until ~init:None ~f:(fun _ (Lr0Item.{prod={lhs_index; _}; dot}, _lr1item) -> + let is_start = Uns.(dot = 0L) in + match is_start with + | false -> None, false + | true -> Some lhs_index, true + ) items + |> Option.value_hlt + let is_accept t = fold_until ~init:true ~f:(fun _ lr1item -> match Lr1Item.is_accept lr1item with is_accept -> is_accept, not is_accept diff --git a/bootstrap/bin/hocc/lr1Itemset.mli b/bootstrap/bin/hocc/lr1Itemset.mli index 73f777a58..4367d0e33 100644 --- a/bootstrap/bin/hocc/lr1Itemset.mli +++ b/bootstrap/bin/hocc/lr1Itemset.mli @@ -36,6 +36,10 @@ val is_start: t -> bool position of 0. Other LR(1) items can (and often do) exist in a grammar, but they are never inserted into an LR(1) item set. *) +val start_symbol_index: t -> Symbol.Index.t +(** [start_symbol_index t] returns the start symbol index of [t], where [t] must be a start state. +*) + val is_empty: t -> bool (** [is_empty t] returns true if [t] contains no LR(1) items. *) diff --git a/bootstrap/bin/hocc/parse.ml b/bootstrap/bin/hocc/parse.ml index 00ece0355..8be2abfb4 100644 --- a/bootstrap/bin/hocc/parse.ml +++ b/bootstrap/bin/hocc/parse.ml @@ -53,127 +53,133 @@ module Error = struct {source; msg} end -type uident = +type nonterm_uident = | Uident of {uident: Scan.Token.t} -and cident = +and nonterm_cident = | Cident of {cident: Scan.Token.t} -and ident = - | IdentUident of {uident: uident} - | IdentCident of {cident: cident} +and nonterm_ident = + | IdentUident of {uident: nonterm_uident} + | IdentCident of {cident: nonterm_cident} | IdentUscore of {uscore: Scan.Token.t} -and precs_tl = - | PrecsTlCommaUident of {comma: Scan.Token.t; uident: uident; precs_tl: precs_tl} +and nonterm_precs_tl = + | PrecsTlCommaUident of {comma: Scan.Token.t; uident: nonterm_uident; precs_tl: nonterm_precs_tl} | PrecsTlEpsilon -and precs = - | Precs of {uident: uident; precs_tl: precs_tl} -and prec_rels = - | PrecRelsLtPrecs of {lt: Scan.Token.t; precs: precs} +and nonterm_precs = + | Precs of {uident: nonterm_uident; precs_tl: nonterm_precs_tl} +and nonterm_prec_rels = + | PrecRelsLtPrecs of {lt: Scan.Token.t; precs: nonterm_precs} | PrecRelsEpsilon -and prec_type = - | PrecTypeNeutral of {neutral: Scan.Token.t} - | PrecTypeLeft of {left: Scan.Token.t} - | PrecTypeRight of {right: Scan.Token.t} -and prec = - | Prec of {prec_type: prec_type; uident: uident; prec_rels: prec_rels} -and of_type = - | OfType of {of_: Scan.Token.t; type_module: cident; dot: Scan.Token.t; type_type: uident} -and of_type0 = - | OfType0OfType of {of_type: of_type} +and nonterm_prec_type = + | PrecTypeNeutral of {neutral_: Scan.Token.t} + | PrecTypeLeft of {left_: Scan.Token.t} + | PrecTypeRight of {right_: Scan.Token.t} +and nonterm_prec = + | Prec of {prec_type: nonterm_prec_type; uident: nonterm_uident; prec_rels: nonterm_prec_rels} +and nonterm_of_type = + | OfType of {of_: Scan.Token.t; type_module: nonterm_cident; dot: Scan.Token.t; + type_type: nonterm_uident} +and nonterm_of_type0 = + | OfType0OfType of {of_type: nonterm_of_type} | OfType0Epsilon -and prec_ref = - | PrecRefPrecUident of {prec: Scan.Token.t; uident: uident} +and nonterm_prec_ref = + | PrecRefPrecUident of {prec_: Scan.Token.t; uident: nonterm_uident} | PrecRefEpsilon -and token_alias = +and nonterm_token_alias = | TokenAlias of {alias: Scan.Token.t} | TokenAliasEpsilon -and token = - | Token of {token: Scan.Token.t; cident: cident; token_alias: token_alias; of_type0: of_type0; - prec_ref: prec_ref} -and sep = +and nonterm_token = + | Token of {token_: Scan.Token.t; cident: nonterm_cident; token_alias: nonterm_token_alias; + of_type0: nonterm_of_type0; prec_ref: nonterm_prec_ref} +and nonterm_sep = | SepLineDelim of {line_delim: Scan.Token.t} | SepSemi of {semi: Scan.Token.t} | SepBar of {bar: Scan.Token.t} -and codes_tl = - | CodesTlSepCode of {sep: sep; code: code; codes_tl: codes_tl} +and nonterm_codes_tl = + | CodesTlSepCode of {sep: nonterm_sep; code: nonterm_code; codes_tl: nonterm_codes_tl} | CodesTlEpsilon -and codes = - | Codes of {code: code; codes_tl: codes_tl} -and codes0 = - | Codes0Codes of {codes: codes} +and nonterm_codes = + | Codes of {code: nonterm_code; codes_tl: nonterm_codes_tl} +and nonterm_codes0 = + | Codes0Codes of {codes: nonterm_codes} | Codes0Epsilon -and delimited = - | DelimitedBlock of {indent: Scan.Token.t; codes: codes; dedent: Scan.Token.t} - | DelimitedParen of {lparen: Scan.Token.t; codes0: codes0; rparen: Scan.Token.t} - | DelimitedCapture of {lcapture: Scan.Token.t; codes0: codes0; rcapture: Scan.Token.t} - | DelimitedList of {lbrack: Scan.Token.t; codes0: codes0; rbrack: Scan.Token.t} - | DelimitedArray of {larray: Scan.Token.t; codes0: codes0; rarray: Scan.Token.t} - | DelimitedModule of {lcurly: Scan.Token.t; codes0: codes0; rcurly: Scan.Token.t} -and code_tl = - | CodeTlDelimited of {delimited: delimited; code_tl: code_tl} - | CodeTlToken of {token: Scan.Token.t; code_tl: code_tl} +and nonterm_delimited = + | DelimitedBlock of {indent: Scan.Token.t; codes: nonterm_codes; dedent: Scan.Token.t} + | DelimitedParen of {lparen: Scan.Token.t; codes0: nonterm_codes0; rparen: Scan.Token.t} + | DelimitedCapture of {lcapture: Scan.Token.t; codes0: nonterm_codes0; rcapture: Scan.Token.t} + | DelimitedList of {lbrack: Scan.Token.t; codes0: nonterm_codes0; rbrack: Scan.Token.t} + | DelimitedArray of {larray: Scan.Token.t; codes0: nonterm_codes0; rarray: Scan.Token.t} + | DelimitedModule of {lcurly: Scan.Token.t; codes0: nonterm_codes0; rcurly: Scan.Token.t} +and nonterm_code_tl = + | CodeTlDelimited of {delimited: nonterm_delimited; code_tl: nonterm_code_tl} + | CodeTlToken of {token_: Scan.Token.t; code_tl: nonterm_code_tl} | CodeTlEpsilon -and code = - | CodeDelimited of {delimited: delimited; code_tl: code_tl} - | CodeToken of {token: Scan.Token.t; code_tl: code_tl} -and prod_param_symbol = - | ProdParamSymbolCident of {cident: cident} +and nonterm_code = + | CodeDelimited of {delimited: nonterm_delimited; code_tl: nonterm_code_tl} + | CodeToken of {token_: Scan.Token.t; code_tl: nonterm_code_tl} +and nonterm_prod_param_symbol = + | ProdParamSymbolCident of {cident: nonterm_cident} | ProdParamSymbolAlias of {alias: Scan.Token.t} -and prod_param = - | ProdParamBinding of {ident: ident; colon: Scan.Token.t; prod_param_symbol: prod_param_symbol} - | ProdParam of {prod_param_symbol: prod_param_symbol} -and prod_params_tl = - | ProdParamsTlProdParam of {prod_param: prod_param; prod_params_tl: prod_params_tl} +and nonterm_prod_param = + | ProdParamBinding of {ident: nonterm_ident; colon: Scan.Token.t; + prod_param_symbol: nonterm_prod_param_symbol} + | ProdParam of {prod_param_symbol: nonterm_prod_param_symbol} +and nonterm_prod_params_tl = + | ProdParamsTlProdParam of {prod_param: nonterm_prod_param; + prod_params_tl: nonterm_prod_params_tl} | ProdParamsTlEpsilon -and prod_params = - | ProdParamsProdParam of {prod_param: prod_param; prod_params_tl: prod_params_tl} -and prod_pattern = - | ProdPatternParams of {prod_params: prod_params} - | ProdPatternEpsilon of {epsilon: Scan.Token.t} -and prod = - | Prod of {prod_pattern: prod_pattern; prec_ref: prec_ref} -and prods_tl = - | ProdsTlBarProd of {bar: Scan.Token.t; prod: prod; prods_tl: prods_tl} +and nonterm_prod_params = + | ProdParamsProdParam of {prod_param: nonterm_prod_param; prod_params_tl: nonterm_prod_params_tl} +and nonterm_prod_pattern = + | ProdPatternParams of {prod_params: nonterm_prod_params} + | ProdPatternEpsilon of {epsilon_: Scan.Token.t} +and nonterm_prod = + | Prod of {prod_pattern: nonterm_prod_pattern; prec_ref: nonterm_prec_ref} +and nonterm_prods_tl = + | ProdsTlBarProd of {bar: Scan.Token.t; prod: nonterm_prod; prods_tl: nonterm_prods_tl} | ProdsTlEpsilon -and prods = - | ProdsBarProd of {bar: Scan.Token.t; prod: prod; prods_tl: prods_tl} - | ProdsProd of {prod: prod; prods_tl: prods_tl} -and reduction = - | Reduction of {prods: prods; arrow: Scan.Token.t; code: code} -and reductions_tl = - | ReductionsTlBarReduction of {bar: Scan.Token.t; reduction: reduction; - reductions_tl: reductions_tl} +and nonterm_prods = + | ProdsBarProd of {bar: Scan.Token.t; prod: nonterm_prod; prods_tl: nonterm_prods_tl} + | ProdsProd of {prod: nonterm_prod; prods_tl: nonterm_prods_tl} +and nonterm_reduction = + | Reduction of {prods: nonterm_prods; arrow: Scan.Token.t; code: nonterm_code} +and nonterm_reductions_tl = + | ReductionsTlBarReduction of {bar: Scan.Token.t; reduction: nonterm_reduction; + reductions_tl: nonterm_reductions_tl} | ReductionsTlEpsilon -and reductions = - | ReductionsReduction of {reduction: reduction; reductions_tl: reductions_tl} -and nonterm_type = - | NontermTypeNonterm of {nonterm: Scan.Token.t} - | NontermTypeStart of {start: Scan.Token.t} -and nonterm = - | NontermProds of {nonterm_type: nonterm_type; cident: cident; prec_ref: prec_ref; - cce: Scan.Token.t; prods: prods} - | NontermReductions of {nonterm_type: nonterm_type; cident: cident; of_type: of_type; - prec_ref: prec_ref; cce: Scan.Token.t; reductions: reductions} -and stmt = - | StmtPrec of {prec: prec} - | StmtToken of {token: token} - | StmtNonterm of {nonterm: nonterm} - | StmtCode of {code: code} -and stmts_tl = - | StmtsTl of {line_delim: Scan.Token.t; stmt: stmt; stmts_tl: stmts_tl} +and nonterm_reductions = + | ReductionsReduction of {reduction: nonterm_reduction; reductions_tl: nonterm_reductions_tl} +and nonterm_nonterm_type = + | NontermTypeNonterm of {nonterm_: Scan.Token.t} + | NontermTypeStart of {start_: Scan.Token.t} +and nonterm_nonterm = + | NontermProds of {nonterm_type: nonterm_nonterm_type; cident: nonterm_cident; + prec_ref: nonterm_prec_ref; cce: Scan.Token.t; prods: nonterm_prods} + | NontermReductions of {nonterm_type: nonterm_nonterm_type; cident: nonterm_cident; + of_type: nonterm_of_type; prec_ref: nonterm_prec_ref; cce: Scan.Token.t; + reductions: nonterm_reductions} +and nonterm_stmt = + | StmtPrec of {prec_: nonterm_prec} + | StmtToken of {token_: nonterm_token} + | StmtNonterm of {nonterm_: nonterm_nonterm} + | StmtCode of {code: nonterm_code} +and nonterm_stmts_tl = + | StmtsTl of {line_delim: Scan.Token.t; stmt: nonterm_stmt; stmts_tl: nonterm_stmts_tl} | StmtsTlEpsilon -and stmts = - | Stmts of {stmt: stmt; stmts_tl: stmts_tl} -and hocc = - | Hocc of {hocc: Scan.Token.t; indent: Scan.Token.t; stmts: stmts; dedent: Scan.Token.t} -and eoi = +and nonterm_stmts = + | Stmts of {stmt: nonterm_stmt; stmts_tl: nonterm_stmts_tl} +and nonterm_hocc = + | Hocc of {hocc_: Scan.Token.t; indent: Scan.Token.t; stmts: nonterm_stmts; dedent: Scan.Token.t} +and nonterm_eoi = | Eoi of {eoi: Scan.Token.t} -and matter = - | Matter of {token: Scan.Token.t; matter: matter} +and nonterm_matter = + | Matter of {token_: Scan.Token.t; matter: nonterm_matter} | MatterEpsilon -and hmh = - | Hmh of {prelude: matter; hocc: hocc; postlude: matter; eoi: eoi} -and hmhi = - | Hmhi of {prelude: matter; hocc: Scan.Token.t; postlude: matter; eoi: eoi} +and nonterm_hmh = + | Hmh of {prelude: nonterm_matter; hocc_: nonterm_hocc; postlude: nonterm_matter; + eoi: Scan.Token.t} +and nonterm_hmhi = + | Hmhi of {prelude: nonterm_matter; hocc_: Scan.Token.t; postlude: nonterm_matter; + eoi: Scan.Token.t} (**************************************************************************************************) (* source_of_* functions. *) @@ -236,9 +242,9 @@ and source_of_prec_rels = function | PrecRelsEpsilon -> None and source_of_prec_type = function - | PrecTypeNeutral {neutral} -> token_source neutral - | PrecTypeLeft {left} -> token_source left - | PrecTypeRight {right} -> token_source right + | PrecTypeNeutral {neutral_} -> token_source neutral_ + | PrecTypeLeft {left_} -> token_source left_ + | PrecTypeRight {right_} -> token_source right_ and source_of_prec = function | Prec {prec_type; uident; prec_rels} -> @@ -257,8 +263,8 @@ and source_of_of_type0 = function | OfType0Epsilon -> None and source_of_prec_ref = function - | PrecRefPrecUident {prec; uident} -> - token_source prec + | PrecRefPrecUident {prec_; uident} -> + token_source prec_ |> join_sources (source_of_uident uident) | PrecRefEpsilon -> None @@ -267,8 +273,8 @@ and source_of_token_alias = function | TokenAliasEpsilon -> None and source_of_token = function - | Token {token; cident; token_alias; of_type0; prec_ref} -> - token_source token + | Token {token_; cident; token_alias; of_type0; prec_ref} -> + token_source token_ |> join_sources (source_of_cident cident) |> join_sources (source_of_token_alias token_alias) |> join_sources (source_of_of_type0 of_type0) @@ -309,8 +315,8 @@ and source_of_code_tl = function | CodeTlDelimited {delimited; code_tl} -> source_of_delimited delimited |> join_sources (source_of_code_tl code_tl) - | CodeTlToken {token; code_tl} -> - token_source token + | CodeTlToken {token_; code_tl} -> + token_source token_ |> join_sources (source_of_code_tl code_tl) | CodeTlEpsilon -> None @@ -318,8 +324,8 @@ and source_of_code = function | CodeDelimited {delimited; code_tl} -> source_of_delimited delimited |> join_sources (source_of_code_tl code_tl) - | CodeToken {token; code_tl} -> - token_source token + | CodeToken {token_; code_tl} -> + token_source token_ |> join_sources (source_of_code_tl code_tl) and source_of_prod_param_symbol = function @@ -346,7 +352,7 @@ and source_of_prod_params = function and source_of_prod_pattern = function | ProdPatternParams {prod_params} -> source_of_prod_params prod_params - | ProdPatternEpsilon {epsilon} -> token_source epsilon + | ProdPatternEpsilon {epsilon_} -> token_source epsilon_ and source_of_prod = function | Prod {prod_pattern; prec_ref} -> @@ -387,8 +393,8 @@ and source_of_reductions = function |> join_sources (source_of_reductions_tl reductions_tl) and source_of_nonterm_type = function - | NontermTypeNonterm {nonterm} -> token_source nonterm - | NontermTypeStart {start} -> token_source start + | NontermTypeNonterm {nonterm_} -> token_source nonterm_ + | NontermTypeStart {start_} -> token_source start_ and source_of_nonterm = function | NontermProds {nonterm_type; cident=_; prec_ref=_; cce=_; prods} -> @@ -399,9 +405,9 @@ and source_of_nonterm = function |> join_sources (source_of_reductions reductions) and source_of_stmt = function - | StmtPrec {prec} -> source_of_prec prec - | StmtToken {token} -> source_of_token token - | StmtNonterm {nonterm} -> source_of_nonterm nonterm + | StmtPrec {prec_} -> source_of_prec prec_ + | StmtToken {token_} -> source_of_token token_ + | StmtNonterm {nonterm_} -> source_of_nonterm nonterm_ | StmtCode {code} -> source_of_code code and source_of_stmts_tl = function @@ -417,35 +423,35 @@ and source_of_stmts = function |> join_sources (source_of_stmts_tl stmts_tl) and source_of_hocc = function - | Hocc {hocc; indent=_; stmts=_; dedent} -> - token_source hocc + | Hocc {hocc_; indent=_; stmts=_; dedent} -> + token_source hocc_ |> join_sources (token_source dedent) and source_of_eoi = function | Eoi {eoi} -> token_source eoi and source_of_matter = function - | Matter {token; matter} -> - token_source token + | Matter {token_; matter} -> + token_source token_ |> join_sources (source_of_matter matter) | MatterEpsilon -> None and source_of_hmh = function - | Hmh {prelude; hocc; postlude=_; eoi} -> + | Hmh {prelude; hocc_; postlude=_; eoi} -> source_of_matter prelude - |> join_sources (source_of_hocc hocc) - |> join_sources (source_of_eoi eoi) + |> join_sources (source_of_hocc hocc_) + |> join_sources (token_source eoi) and source_of_hmhi = function - | Hmhi {prelude; hocc; postlude=_; eoi} -> + | Hmhi {prelude; hocc_; postlude=_; eoi} -> source_of_matter prelude - |> join_sources (token_source hocc) - |> join_sources (source_of_eoi eoi) + |> join_sources (token_source hocc_) + |> join_sources (token_source eoi) (**************************************************************************************************) (* fmt_* functions. *) -let fmt_lcurly ~alt ~width formatter = +let rec fmt_lcurly ~alt ~width formatter = match alt with | false -> formatter |> Fmt.fmt "{" | true -> @@ -453,7 +459,7 @@ let fmt_lcurly ~alt ~width formatter = |> Fmt.fmt "{\n" |> Fmt.fmt ~pad:" " ~just:Fmt.Left ~width:(width + 4L) "" -let fmt_semi ~alt ~width formatter = +and fmt_semi ~alt ~width formatter = match alt with | false -> formatter |> Fmt.fmt "; " | true -> @@ -461,7 +467,7 @@ let fmt_semi ~alt ~width formatter = |> Fmt.fmt "\n" |> Fmt.fmt ~pad:" " ~just:Fmt.Left ~width:(width + 4L) "" -let fmt_rcurly ~alt ~width formatter = +and fmt_rcurly ~alt ~width formatter = match alt with | false -> formatter |> Fmt.fmt "}" | true -> @@ -470,7 +476,7 @@ let fmt_rcurly ~alt ~width formatter = |> Fmt.fmt ~pad:" " ~just:Fmt.Left ~width:(width + 2L) "" |> Fmt.fmt "}" -let rec fmt_uident ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) uident formatter = +and fmt_uident ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) uident formatter = match uident with | Uident {uident} -> formatter @@ -481,7 +487,7 @@ let rec fmt_uident ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) uident form and pp_uident uident formatter = fmt_uident uident formatter -let rec fmt_cident ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) cident formatter = +and fmt_cident ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) cident formatter = match cident with | Cident {cident} -> formatter @@ -512,8 +518,7 @@ and fmt_ident ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) ident formatter and pp_ident ident formatter = fmt_ident ident formatter -and fmt_precs_tl ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) precs_tl - formatter = +and fmt_precs_tl ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) precs_tl formatter = let width' = width + 4L in match precs_tl with | PrecsTlCommaUident {comma; uident; precs_tl} -> @@ -560,20 +565,20 @@ and pp_prec_rels prec_rels formatter = and fmt_prec_type ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prec_type formatter = match prec_type with - | PrecTypeNeutral {neutral} -> + | PrecTypeNeutral {neutral_} -> formatter |> Fmt.fmt "PrecTypeNeutral " |> fmt_lcurly ~alt ~width - |> Fmt.fmt "neutral=" |> Scan.Token.pp neutral + |> Fmt.fmt "neutral_=" |> Scan.Token.pp neutral_ |> fmt_rcurly ~alt ~width - | PrecTypeLeft {left} -> + | PrecTypeLeft {left_} -> formatter |> Fmt.fmt "PrecTypeLeft " |> fmt_lcurly ~alt ~width - |> Fmt.fmt "left=" |> Scan.Token.pp left + |> Fmt.fmt "left_=" |> Scan.Token.pp left_ |> fmt_rcurly ~alt ~width - | PrecTypeRight {right} -> + | PrecTypeRight {right_} -> formatter |> Fmt.fmt "PrecTypeRight " |> fmt_lcurly ~alt ~width - |> Fmt.fmt "right=" |> Scan.Token.pp right + |> Fmt.fmt "right_=" |> Scan.Token.pp right_ |> fmt_rcurly ~alt ~width and pp_prec_type prec_type formatter = fmt_prec_type prec_type formatter @@ -626,10 +631,10 @@ and pp_of_type0 of_type0 formatter = and fmt_prec_ref ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prec_ref formatter = let width' = width + 4L in match prec_ref with - | PrecRefPrecUident {prec; uident} -> + | PrecRefPrecUident {prec_; uident} -> formatter |> Fmt.fmt "PrecRefPrecUident " |> fmt_lcurly ~alt ~width - |> Fmt.fmt "prec=" |> Scan.Token.pp prec + |> Fmt.fmt "prec_=" |> Scan.Token.pp prec_ |> fmt_semi ~alt ~width |> Fmt.fmt "uident=" |> fmt_uident ~alt ~width:width' uident |> fmt_rcurly ~alt ~width @@ -653,10 +658,10 @@ and pp_token_alias token_alias formatter = and fmt_token ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) token formatter = let width' = width + 4L in match token with - | Token {token; cident; token_alias; of_type0; prec_ref} -> + | Token {token_; cident; token_alias; of_type0; prec_ref} -> formatter |> Fmt.fmt "Token " |> fmt_lcurly ~alt ~width - |> Fmt.fmt "token=" |> Scan.Token.pp token + |> Fmt.fmt "token_=" |> Scan.Token.pp token_ |> fmt_semi ~alt ~width |> Fmt.fmt "cident=" |> fmt_cident ~alt ~width:width' cident |> fmt_semi ~alt ~width @@ -801,10 +806,10 @@ and fmt_code_tl ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) code_tl format |> fmt_semi ~alt ~width |> Fmt.fmt "code_tl=" |> fmt_code_tl ~alt ~width:width' code_tl |> fmt_rcurly ~alt ~width - | CodeTlToken {token; code_tl} -> + | CodeTlToken {token_; code_tl} -> formatter |> Fmt.fmt "CodeTlToken " |> fmt_lcurly ~alt ~width - |> Fmt.fmt "token=" |> Scan.Token.pp token + |> Fmt.fmt "token_=" |> Scan.Token.pp token_ |> fmt_semi ~alt ~width |> Fmt.fmt "code_tl=" |> fmt_code_tl ~alt ~width:width' code_tl |> fmt_rcurly ~alt ~width @@ -823,10 +828,10 @@ and fmt_code ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) code formatter = |> fmt_semi ~alt ~width |> Fmt.fmt "code_tl=" |> fmt_code_tl ~alt ~width:width' code_tl |> fmt_rcurly ~alt ~width - | CodeToken {token; code_tl} -> + | CodeToken {token_; code_tl} -> formatter |> Fmt.fmt "CodeToken " |> fmt_lcurly ~alt ~width - |> Fmt.fmt "token=" |> Scan.Token.pp token + |> Fmt.fmt "token_=" |> Scan.Token.pp token_ |> fmt_semi ~alt ~width |> Fmt.fmt "code_tl=" |> fmt_code_tl ~alt ~width:width' code_tl |> fmt_rcurly ~alt ~width @@ -861,7 +866,8 @@ and fmt_prod_param ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prod_param |> Fmt.fmt "colon=" |> Scan.Token.pp colon |> fmt_semi ~alt ~width |> Fmt.fmt "prod_param_symbol=" |> fmt_prod_param_symbol ~alt ~width:width' prod_param_symbol - |> fmt_rcurly ~alt ~width | ProdParam {prod_param_symbol} -> + |> fmt_rcurly ~alt ~width + | ProdParam {prod_param_symbol} -> formatter |> Fmt.fmt "ProdParam " |> fmt_lcurly ~alt ~width |> Fmt.fmt "prod_param_symbol=" |> fmt_prod_param_symbol ~alt ~width:width' prod_param_symbol @@ -905,10 +911,10 @@ and fmt_prod_pattern ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) prod_patt |> fmt_lcurly ~alt ~width |> Fmt.fmt "prod_params=" |> fmt_prod_params ~alt ~width:width' prod_params |> fmt_rcurly ~alt ~width - | ProdPatternEpsilon {epsilon} -> + | ProdPatternEpsilon {epsilon_} -> formatter |> Fmt.fmt "ProdPatternEpsilon " |> fmt_lcurly ~alt ~width - |> Fmt.fmt "epsilon=" |> Scan.Token.pp epsilon + |> Fmt.fmt "epsilon_=" |> Scan.Token.pp epsilon_ |> fmt_rcurly ~alt ~width and pp_prod_pattern prod_pattern formatter = fmt_prod_pattern prod_pattern formatter @@ -1012,15 +1018,15 @@ and pp_reductions reductions formatter = and fmt_nonterm_type ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) nonterm_type formatter = match nonterm_type with - | NontermTypeNonterm {nonterm} -> + | NontermTypeNonterm {nonterm_} -> formatter |> Fmt.fmt "NontermTypeNonterm " |> fmt_lcurly ~alt ~width - |> Fmt.fmt "nonterm=" |> Scan.Token.pp nonterm + |> Fmt.fmt "nonterm_=" |> Scan.Token.pp nonterm_ |> fmt_rcurly ~alt ~width - | NontermTypeStart {start} -> + | NontermTypeStart {start_} -> formatter |> Fmt.fmt "NontermTypeStart " |> fmt_lcurly ~alt ~width - |> Fmt.fmt "start=" |> Scan.Token.pp start + |> Fmt.fmt "start_=" |> Scan.Token.pp start_ |> fmt_rcurly ~alt ~width and pp_nonterm_type nonterm_type formatter = fmt_nonterm_type nonterm_type formatter @@ -1062,20 +1068,20 @@ and pp_nonterm nonterm formatter = and fmt_stmt ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) stmt formatter = let width' = width + 4L in match stmt with - | StmtPrec {prec} -> + | StmtPrec {prec_} -> formatter |> Fmt.fmt "StmtPrec " |> fmt_lcurly ~alt ~width - |> Fmt.fmt "prec=" |> fmt_prec ~alt ~width:width' prec + |> Fmt.fmt "prec_=" |> fmt_prec ~alt ~width:width' prec_ |> fmt_rcurly ~alt ~width - | StmtToken {token} -> + | StmtToken {token_} -> formatter |> Fmt.fmt "StmtToken " |> fmt_lcurly ~alt ~width - |> Fmt.fmt "token=" |> fmt_token ~alt ~width:width' token + |> Fmt.fmt "token_=" |> fmt_token ~alt ~width:width' token_ |> fmt_rcurly ~alt ~width - | StmtNonterm {nonterm} -> + | StmtNonterm {nonterm_} -> formatter |> Fmt.fmt "StmtNonterm " |> fmt_lcurly ~alt ~width - |> Fmt.fmt "nonterm=" |> fmt_nonterm ~alt ~width:width' nonterm + |> Fmt.fmt "nonterm_=" |> fmt_nonterm ~alt ~width:width' nonterm_ |> fmt_rcurly ~alt ~width | StmtCode {code} -> formatter |> Fmt.fmt "StmtCode " @@ -1118,10 +1124,10 @@ and pp_stmts stmts formatter = and fmt_hocc ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) hocc formatter = let width' = width + 4L in match hocc with - | Hocc {hocc; indent; stmts; dedent} -> + | Hocc {hocc_; indent; stmts; dedent} -> formatter |> Fmt.fmt "Hocc " |> fmt_lcurly ~alt ~width - |> Fmt.fmt "hocc=" |> Scan.Token.pp hocc + |> Fmt.fmt "hocc_=" |> Scan.Token.pp hocc_ |> fmt_semi ~alt ~width |> Fmt.fmt "indent=" |> Scan.Token.pp indent |> fmt_semi ~alt ~width @@ -1145,10 +1151,10 @@ and pp_eoi eoi formatter = and fmt_matter ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) matter formatter = let width' = width + 4L in match matter with - | Matter {token; matter} -> + | Matter {token_; matter} -> formatter |> Fmt.fmt "Matter " |> fmt_lcurly ~alt ~width - |> Fmt.fmt "token=" |> Scan.Token.pp token + |> Fmt.fmt "token_=" |> Scan.Token.pp token_ |> fmt_semi ~alt ~width |> Fmt.fmt "matter=" |> fmt_matter ~alt ~width:width' matter |> fmt_rcurly ~alt ~width @@ -1160,33 +1166,32 @@ and pp_matter matter formatter = and fmt_hmh ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) hmh formatter = let width' = width + 4L in match hmh with - | Hmh {prelude; hocc; postlude; eoi} -> + | Hmh {prelude; hocc_; postlude; eoi} -> formatter |> Fmt.fmt "Hmh " |> fmt_lcurly ~alt ~width |> Fmt.fmt "prelude=" |> pp_matter prelude |> fmt_semi ~alt ~width - |> Fmt.fmt "hocc=" |> fmt_hocc ~alt ~width:width' hocc + |> Fmt.fmt "hocc_=" |> fmt_hocc ~alt ~width:width' hocc_ |> fmt_semi ~alt ~width |> Fmt.fmt "postlude=" |> pp_matter postlude |> fmt_semi ~alt ~width - |> Fmt.fmt "eoi=" |> fmt_eoi ~alt ~width:width' eoi + |> Fmt.fmt "eoi=" |> Scan.Token.pp eoi |> fmt_rcurly ~alt ~width and pp_hmh hmh formatter = fmt_hmh hmh formatter and fmt_hmhi ?(alt=Fmt.alt_default) ?(width=Fmt.width_default) hmhi formatter = - let width' = width + 4L in match hmhi with - | Hmhi {prelude; hocc; postlude; eoi} -> + | Hmhi {prelude; hocc_; postlude; eoi} -> formatter |> Fmt.fmt "Hmhi " |> fmt_lcurly ~alt ~width |> Fmt.fmt "prelude=" |> pp_matter prelude |> fmt_semi ~alt ~width - |> Fmt.fmt "hocc=" |> Scan.Token.pp hocc + |> Fmt.fmt "hocc_=" |> Scan.Token.pp hocc_ |> fmt_semi ~alt ~width |> Fmt.fmt "postlude=" |> pp_matter postlude |> fmt_semi ~alt ~width - |> Fmt.fmt "eoi=" |> fmt_eoi ~alt ~width:width' eoi + |> Fmt.fmt "eoi=" |> Scan.Token.pp eoi |> fmt_rcurly ~alt ~width and pp_hmhi hmhi formatter = fmt_hmhi hmhi formatter @@ -1215,8 +1220,44 @@ let rec next ?(all=false) spine ({scanner; errs} as ctx) = |> Fmt.fmt " " |> (List.pp String.pp) (List.rev spine) |> Fmt.fmt " " |> pp_ctx ctx |> ignore in - let errs' = List.fold (Scan.Token.malformations tok) ~init:errs ~f:(fun accum mal -> - Error.init_mal mal :: accum) in + let malformations = Scan.Token.malformations tok in + let tok, errs' = match malformations with + | [] -> tok, errs + | mal :: [] -> begin + (* Try to pass e.g. 42L through as a u64 token to support OCaml syntax. *) + let u64_opt = match Hmc.Scan.AbstractToken.Rendition.Malformation.description mal with + | "Invalid numerical constant" -> begin + let source = Scan.Token.source tok in + Hmc.Source.Slice.to_string source + |> String.chop_suffix ~suffix:"L" + |> (fun s_opt -> + match s_opt with + | None -> None + | Some s -> Stdlib.Int64.of_string_opt s + ) + end + | _ -> None + in + match u64_opt with + | Some x -> begin + let rendition = Hmc.Scan.AbstractToken.Rendition.Constant x in + let ctok = Hmc.Scan.ConcreteToken.{ + atok=Hmc.Scan.AbstractToken.Tok_u64 rendition; + source=Scan.Token.source tok + } in + let tok = Scan.Token.HmcToken ctok in + tok, errs + end + | None -> begin + let errs' = Error.init_mal mal :: errs in + tok, errs' + end + end + | _ -> begin + let errs' = List.fold malformations ~init:errs ~f:(fun accum mal -> + Error.init_mal mal :: accum) in + tok, errs' + end in let ctx' = {scanner=scanner'; errs=errs'} in match all, tok with | _, HmcToken {atok=Tok_whitespace; _} @@ -1321,12 +1362,12 @@ and prec_type spine ctx = let spine = "prec_type" :: spine in let ctx', tok = next spine ctx in match tok with - | HoccToken {atok=Scan.AbstractToken.Tok_neutral; _} as neutral -> - reduce spine ctx' fmt_prec_type (PrecTypeNeutral {neutral}) - | HoccToken {atok=Scan.AbstractToken.Tok_left; _} as left -> - reduce spine ctx' fmt_prec_type (PrecTypeLeft {left}) - | HoccToken {atok=Scan.AbstractToken.Tok_right; _} as right -> - reduce spine ctx' fmt_prec_type (PrecTypeRight {right}) + | HoccToken {atok=Scan.AbstractToken.Tok_neutral; _} as neutral_ -> + reduce spine ctx' fmt_prec_type (PrecTypeNeutral {neutral_}) + | HoccToken {atok=Scan.AbstractToken.Tok_left; _} as left_ -> + reduce spine ctx' fmt_prec_type (PrecTypeLeft {left_}) + | HoccToken {atok=Scan.AbstractToken.Tok_right; _} as right_ -> + reduce spine ctx' fmt_prec_type (PrecTypeRight {right_}) | _ -> err_token tok "Expected precedence type" ctx, None and prec spine ctx = @@ -1374,9 +1415,9 @@ and prec_ref spine ctx = let spine = "prec_ref" :: spine in let ctx', tok = next spine ctx in match tok with - | HoccToken {atok=Scan.AbstractToken.Tok_prec; _} as prec -> + | HoccToken {atok=Scan.AbstractToken.Tok_prec; _} as prec_ -> map ~child:uident ~f:(fun uident -> - PrecRefPrecUident {prec; uident} + PrecRefPrecUident {prec_; uident} ) ~fmt_child:fmt_prec_ref spine ctx' | _ -> reduce spine ctx fmt_prec_ref PrecRefEpsilon @@ -1391,12 +1432,12 @@ and token_alias spine ctx = and token spine ctx = let ctx', tok = next spine ctx in match tok with - | HoccToken {atok=Scan.AbstractToken.Tok_token; _} as token -> + | HoccToken {atok=Scan.AbstractToken.Tok_token; _} as token_ -> mapr ~child:cident ~f:(fun spine ctx' cident -> mapr ~child:token_alias ~f:(fun spine ctx' token_alias -> mapr ~child:of_type0 ~f:(fun spine ctx' of_type0 -> map ~child:prec_ref ~f:(fun prec_ref -> - Token {token; cident; token_alias; of_type0; prec_ref} + Token {token_; cident; token_alias; of_type0; prec_ref} ) ~fmt_child:fmt_token spine ctx' ) spine ctx' ) spine ctx' @@ -1552,9 +1593,9 @@ and code_tl spine ctx = |Tok_rarray|Tok_rcurly |Tok_line_delim|Tok_semi|Tok_bar); _} -> reduce spine ctx fmt_code_tl CodeTlEpsilon - | HmcToken _ as token -> + | HmcToken _ as token_ -> map ~child:code_tl ~f:(fun code_tl -> - CodeTlToken {token; code_tl} + CodeTlToken {token_; code_tl} ) ~fmt_child:fmt_code_tl spine ctx' | _ -> reduce spine ctx fmt_code_tl CodeTlEpsilon @@ -1573,9 +1614,9 @@ and code spine ctx = |Tok_rarray|Tok_rcurly |Tok_line_delim|Tok_semi|Tok_bar); _} -> err_token tok "Expected Hemlock code" ctx, None - | HmcToken _ as token -> + | HmcToken _ as token_ -> map ~child:code_tl ~f:(fun code_tl -> - CodeToken {token; code_tl} + CodeToken {token_; code_tl} ) ~fmt_child:fmt_code spine ctx' | _ -> err_token tok "Expected Hemlock code" ctx, None @@ -1636,8 +1677,8 @@ and prod_pattern spine ctx = let spine = "prod_pattern" :: spine in let ctx', tok = next spine ctx in match tok with - | HoccToken {atok=Scan.AbstractToken.Tok_epsilon; _} as epsilon -> - reduce spine ctx' fmt_prod_pattern (ProdPatternEpsilon {epsilon}) + | HoccToken {atok=Scan.AbstractToken.Tok_epsilon; _} as epsilon_ -> + reduce spine ctx' fmt_prod_pattern (ProdPatternEpsilon {epsilon_}) | _ -> map ~child:prod_params ~f:(fun prod_params -> ProdPatternParams {prod_params} @@ -1721,10 +1762,10 @@ and nonterm_type spine ctx = let spine = "nonterm_type" :: spine in let ctx', tok = next spine ctx in match tok with - | HoccToken {atok=Scan.AbstractToken.Tok_nonterm; _} as nonterm -> - reduce spine ctx' fmt_nonterm_type (NontermTypeNonterm {nonterm}) - | HoccToken {atok=Scan.AbstractToken.Tok_start; _} as start -> - reduce spine ctx' fmt_nonterm_type (NontermTypeStart {start}) + | HoccToken {atok=Scan.AbstractToken.Tok_nonterm; _} as nonterm_ -> + reduce spine ctx' fmt_nonterm_type (NontermTypeNonterm {nonterm_}) + | HoccToken {atok=Scan.AbstractToken.Tok_start; _} as start_ -> + reduce spine ctx' fmt_nonterm_type (NontermTypeStart {start_}) | _ -> err_token tok "Expected 'nonterm'/'start'" ctx, None and nonterm spine ctx = @@ -1733,7 +1774,7 @@ and nonterm spine ctx = let spine = "cce" :: spine in let ctx', tok = next spine ctx in match tok with - | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_colon_op "::="; _} -> ctx', Some tok + | HoccToken {atok=Scan.AbstractToken.Tok_colon_colon_eq; _} -> ctx', Some tok | _ -> err_token tok "Expected '::='" ctx, None end in mapr ~child:nonterm_type ~f:(fun spine ctx' nonterm_type -> @@ -1766,11 +1807,11 @@ and stmt spine ctx = let _ctx', tok = next spine ctx in match tok with | HoccToken {atok=Scan.AbstractToken.(Tok_neutral|Tok_left|Tok_right); _} -> - map ~child:prec ~f:(fun prec -> StmtPrec {prec}) ~fmt_child:fmt_stmt spine ctx + map ~child:prec ~f:(fun prec_ -> StmtPrec {prec_}) ~fmt_child:fmt_stmt spine ctx | HoccToken {atok=Scan.AbstractToken.Tok_token; _} -> - map ~child:token ~f:(fun token -> StmtToken {token}) ~fmt_child:fmt_stmt spine ctx + map ~child:token ~f:(fun token_ -> StmtToken {token_}) ~fmt_child:fmt_stmt spine ctx | HoccToken {atok=Scan.AbstractToken.(Tok_nonterm|Tok_start); _} -> - map ~child:nonterm ~f:(fun nonterm -> StmtNonterm {nonterm}) ~fmt_child:fmt_stmt spine ctx + map ~child:nonterm ~f:(fun nonterm_ -> StmtNonterm {nonterm_}) ~fmt_child:fmt_stmt spine ctx | _ -> map ~child:code ~f:(fun code -> StmtCode {code}) ~fmt_child:fmt_stmt spine ctx and stmts_tl spine ctx = @@ -1805,11 +1846,11 @@ and hocc spine ctx = let spine = "hocc" :: spine in let ctx', tok = next spine ctx in match tok with - | HoccToken {atok=Scan.AbstractToken.Tok_hocc; _} as hocc -> + | HoccToken {atok=Scan.AbstractToken.Tok_hocc; _} as hocc_ -> mapr ~child:indent ~f:(fun spine ctx' indent -> mapr ~child:stmts ~f:(fun spine ctx' stmts -> map ~child:dedent ~f:(fun dedent -> - Hocc {hocc; indent; stmts; dedent} + Hocc {hocc_; indent; stmts; dedent} ) ~fmt_child:fmt_hocc spine ctx' ) spine ctx' ) spine ctx' @@ -1832,7 +1873,7 @@ and matter spine ctx = | HmcToken {atok=Hmc.Scan.AbstractToken.Tok_end_of_input; _} -> ctx, MatterEpsilon | HmcToken _ -> begin let ctx', matter_rchild = f ctx' in - ctx', Matter {token=tok; matter=matter_rchild} + ctx', Matter {token_=tok; matter=matter_rchild} end end in let ctx', matter = f ctx in @@ -1843,10 +1884,10 @@ and hmh scanner = let ctx = {scanner; errs=[]} in let ctx', hmh_opt = mapr ~child:matter ~f:(fun spine ctx' prelude -> - mapr ~child:hocc ~f:(fun spine ctx' hocc -> + mapr ~child:hocc ~f:(fun spine ctx' hocc_ -> mapr ~child:matter ~f:(fun spine ctx' postlude -> - map ~child:eoi ~f:(fun eoi -> - Hmh {prelude; hocc; postlude; eoi} + map ~child:eoi ~f:(fun (Eoi {eoi}) -> + Hmh {prelude; hocc_; postlude; eoi} ) ~fmt_child:fmt_hmh spine ctx' ) spine ctx' ) spine ctx' @@ -1869,10 +1910,10 @@ and hmhi scanner = let ctx = {scanner; errs=[]} in let ctx', hmh_opt = mapr ~child:matter ~f:(fun spine ctx' prelude -> - mapr ~child:hocc ~f:(fun spine ctx' hocc -> + mapr ~child:hocc ~f:(fun spine ctx' hocc_ -> mapr ~child:matter ~f:(fun spine ctx' postlude -> - map ~child:eoi ~f:(fun eoi -> - Hmhi {prelude; hocc; postlude; eoi} + map ~child:eoi ~f:(fun (Eoi {eoi}) -> + Hmhi {prelude; hocc_; postlude; eoi} ) ~fmt_child:fmt_hmhi spine ctx' ) spine ctx' ) spine ctx' @@ -1882,3 +1923,232 @@ and hmhi scanner = | {errs=(_ :: _); _}, _ | _, None -> ctx'.scanner, Error ctx'.errs | {errs=[]; _}, Some hmh -> ctx'.scanner, Ok hmh + +(**************************************************************************************************) +(* Miscellaneous helper functions. *) + +let min_comment_indentation_of_hocc_block = function + | Hocc {indent; _} -> + Scan.Token.source indent + |> Hmc.Source.Slice.base + |> Hmc.Source.Cursor.pos + |> Text.Pos.col + +let base_of_code code = + let of_token token = + let open Scan.Token in + let source = match token with + | HmcToken ctok -> ctok |> Hmc.Scan.ConcreteToken.source + | HoccToken ctok -> ctok |> Scan.ConcreteToken.source + in + Hmc.Source.Slice.base source + in + let rec of_delimited = function + | DelimitedBlock {indent=token; _} + | DelimitedParen {lparen=token; _} + | DelimitedCapture {lcapture=token; _} + | DelimitedList {lbrack=token; _} + | DelimitedArray {larray=token; _} + | DelimitedModule {lcurly=token; _} -> of_token token + and of_code = function + | CodeDelimited {delimited; _} -> of_delimited delimited + | CodeToken {token_; _} -> of_token token_ + in + of_code code + +let last_token_of_code hocc_block code = + let min_comment_indentation = min_comment_indentation_of_hocc_block hocc_block in + let rec of_codes_tl = function + | CodesTlSepCode {code; codes_tl; _} -> begin + of_codes_tl codes_tl + |> Option.some_or_thunk ~f:(fun () -> Some (of_code code)) + end + | CodesTlEpsilon -> None + and of_codes = function + | Codes {code; codes_tl} -> begin + of_codes_tl codes_tl + |> Option.value_or_thunk ~f:(fun () -> of_code code) + end + and of_delimited = function + | DelimitedBlock {codes; dedent; _} -> begin + of_codes codes + |> Option.some_or_thunk ~f:(fun () -> Some dedent) + |> Option.value_hlt + end + | DelimitedParen {rparen=token; _} + | DelimitedCapture {rcapture=token; _} + | DelimitedList {rbrack=token; _} + | DelimitedArray {rarray=token; _} + | DelimitedModule {rcurly=token; _} -> token + and of_code_tl = function + | CodeTlDelimited {delimited; code_tl} -> + of_code_tl code_tl + |> Option.some_or_thunk ~f:(fun () -> Some (of_delimited delimited)) + | CodeTlToken {token_; code_tl} -> begin + of_code_tl code_tl + |> Option.some_or_thunk ~f:(fun () -> + (* Exclude comments less indented than `hocc` block from the tail. *) + match token_ with + | HmcToken ctok -> begin + match Hmc.Scan.ConcreteToken.atok ctok with + | Tok_hash_comment + | Tok_paren_comment _ -> begin + let ctok_indentation = + ctok + |> Hmc.Scan.ConcreteToken.source + |> Hmc.Source.Slice.base + |> Hmc.Source.Cursor.pos + |> Text.Pos.col + in + match ctok_indentation >= min_comment_indentation with + | true -> Some token_ + | false -> None + end + | _ -> Some token_ + end + | HoccToken _ -> Some token_ + ) + end + | CodeTlEpsilon -> None + and of_code = function + | CodeDelimited {delimited; code_tl} -> + of_code_tl code_tl |> Option.some_or_thunk ~f:(fun () -> Some (of_delimited delimited)) + | CodeToken {token_; code_tl} -> + of_code_tl code_tl |> Option.some_or_thunk ~f:(fun () -> Some token_) + in + of_code code + |> Option.value_hlt + +let past_of_code hocc_block code = + let of_token token = + let open Scan.Token in + let source = match token with + | HmcToken ctok -> ctok |> Hmc.Scan.ConcreteToken.source + | HoccToken ctok -> ctok |> Scan.ConcreteToken.source + in + Hmc.Source.Slice.past source + in + last_token_of_code hocc_block code + |> of_token + +let source_of_code hocc_block code = + let base = base_of_code code in + let past = past_of_code hocc_block code in + Hmc.Source.Slice.of_cursors ~base ~past + +let indentation_of_code hocc_block code = + let min_comment_indentation = min_comment_indentation_of_hocc_block hocc_block in + match code with + | CodeDelimited _ -> min_comment_indentation + 4L + | CodeToken _ -> min_comment_indentation + +(* Find the base cursor for the postlude that preserves comments/whitespace that fall outside the + * `hocc` block. *) +let postlude_base_of_hocc (Hocc {stmts=Stmts {stmt; stmts_tl}; _} as hocc_block) = + let rec of_uident = function + | Uident {uident} -> uident + and of_cident = function + | Cident {cident} -> cident + and of_precs_tl = function + | PrecsTlCommaUident {uident; precs_tl; _} -> + Some ( + of_precs_tl precs_tl + |> Option.value_or_thunk ~f:(fun () -> of_uident uident) + ) + | PrecsTlEpsilon -> None + and of_precs = function + | Precs {uident; precs_tl} -> begin + of_precs_tl precs_tl + |> Option.value_or_thunk ~f:(fun () -> of_uident uident) + end + and of_prec_rels = function + | PrecRelsLtPrecs {precs; _} -> Some (of_precs precs) + | PrecRelsEpsilon -> None + and of_of_type = function + | OfType {type_type; _} -> of_uident type_type + and of_of_type0 = function + | OfType0OfType {of_type} -> Some (of_of_type of_type) + | OfType0Epsilon -> None + and of_prec_ref = function + | PrecRefPrecUident {uident; _} -> Some (of_uident uident) + | PrecRefEpsilon -> None + and of_token_alias = function + | TokenAlias {alias} -> Some alias + | TokenAliasEpsilon -> None + and of_prod_param_symbol = function + | ProdParamSymbolCident {cident} -> of_cident cident + | ProdParamSymbolAlias {alias} -> alias + and of_prod_param = function + | ProdParamBinding {prod_param_symbol; _} + | ProdParam {prod_param_symbol} -> of_prod_param_symbol prod_param_symbol + and of_prod_params_tl = function + | ProdParamsTlProdParam {prod_param; prod_params_tl} -> begin + of_prod_params_tl prod_params_tl + |> Option.some_or_thunk ~f:(fun () -> Some (of_prod_param prod_param)) + end + | ProdParamsTlEpsilon -> None + and of_prod_params = function + | ProdParamsProdParam {prod_param; prod_params_tl} -> begin + of_prod_params_tl prod_params_tl + |> Option.value_or_thunk ~f:(fun () -> of_prod_param prod_param) + end + and of_prod_pattern = function + | ProdPatternParams {prod_params} -> of_prod_params prod_params + | ProdPatternEpsilon {epsilon_} -> epsilon_ + and of_prod = function + | Prod {prod_pattern; prec_ref} -> begin + of_prec_ref prec_ref + |> Option.value_or_thunk ~f:(fun () -> of_prod_pattern prod_pattern) + end + and of_prods_tl = function + | ProdsTlBarProd {prod; prods_tl; _} -> begin + of_prods_tl prods_tl + |> Option.some_or_thunk ~f:(fun () -> Some (of_prod prod)) + end + | ProdsTlEpsilon -> None + and of_prods = function + | ProdsBarProd {prod; prods_tl; _} + | ProdsProd {prod; prods_tl} -> begin + of_prods_tl prods_tl + |> Option.value_or_thunk ~f:(fun () -> of_prod prod) + end + and of_reduction = function + | Reduction {code; _} -> last_token_of_code hocc_block code + and of_reductions_tl = function + | ReductionsTlBarReduction {reduction; reductions_tl; _} -> begin + of_reductions_tl reductions_tl + |> Option.some_or_thunk ~f:(fun () -> Some (of_reduction reduction)) + end + | ReductionsTlEpsilon -> None + and of_reductions = function + | ReductionsReduction {reduction; reductions_tl} -> begin + of_reductions_tl reductions_tl + |> Option.value_or_thunk ~f:(fun () -> of_reduction reduction) + end + and of_nonterm = function + | NontermProds {prods; _} -> of_prods prods + | NontermReductions {reductions; _} -> of_reductions reductions + and of_stmt = function + | StmtPrec {prec_=Prec {uident; prec_rels; _}} -> begin + of_prec_rels prec_rels + |> Option.value_or_thunk ~f:(fun () -> of_uident uident) + end + | StmtToken {token_=Token {cident; token_alias; of_type0; prec_ref; _}} -> begin + of_prec_ref prec_ref + |> Option.some_or_thunk ~f:(fun () -> of_of_type0 of_type0) + |> Option.some_or_thunk ~f:(fun () -> of_token_alias token_alias) + |> Option.value_or_thunk ~f:(fun () -> of_cident cident) + end + | StmtNonterm {nonterm_} -> of_nonterm nonterm_ + | StmtCode {code} -> last_token_of_code hocc_block code + and of_stmts_tl = function + | StmtsTl {stmt; stmts_tl; _} -> begin + (of_stmts_tl stmts_tl) + |> Option.some_or_thunk ~f:(fun () -> Some (of_stmt stmt)) + end + | StmtsTlEpsilon -> None + in + of_stmts_tl stmts_tl + |> Option.value_or_thunk ~f:(fun () -> of_stmt stmt) + |> Scan.Token.source + |> Hmc.Source.Slice.past diff --git a/bootstrap/bin/hocc/prec.ml b/bootstrap/bin/hocc/prec.ml index 1c78854d3..4a0d3f44d 100644 --- a/bootstrap/bin/hocc/prec.ml +++ b/bootstrap/bin/hocc/prec.ml @@ -7,7 +7,7 @@ type t = { name: string; assoc: Assoc.t option; doms: (Index.t, Index.cmper_witness) Ordset.t; - stmt: Parse.prec; + stmt: Parse.nonterm_prec; } let pp {index; name; assoc; doms; stmt} formatter = diff --git a/bootstrap/bin/hocc/prec.mli b/bootstrap/bin/hocc/prec.mli index 38ec97cf1..bc9192a7c 100644 --- a/bootstrap/bin/hocc/prec.mli +++ b/bootstrap/bin/hocc/prec.mli @@ -19,7 +19,7 @@ type t = { doms: (Index.t, Index.cmper_witness) Ordset.t; (** Set of precedences which dominate this precedence. *) - stmt: Parse.prec; + stmt: Parse.nonterm_prec; (** Declaration AST. *) } @@ -32,5 +32,5 @@ val src_fmt: t -> (module Fmt.Formatter) -> (module Fmt.Formatter) (** Formatter which outputs precedence in hocc syntax. *) val init: index:Index.t -> name:string -> assoc:(Assoc.t option) - -> doms:(Index.t, Index.cmper_witness) Ordset.t -> stmt:Parse.prec -> t + -> doms:(Index.t, Index.cmper_witness) Ordset.t -> stmt:Parse.nonterm_prec -> t (** Used only by [Precs.init]. *) diff --git a/bootstrap/bin/hocc/precs.mli b/bootstrap/bin/hocc/precs.mli index dd33b6166..c53907a08 100644 --- a/bootstrap/bin/hocc/precs.mli +++ b/bootstrap/bin/hocc/precs.mli @@ -11,7 +11,7 @@ val empty: t (** [empty] returns an empty set of precedences. *) val insert: name:string -> assoc:(Assoc.t option) - -> doms:(Prec.Index.t, Prec.Index.cmper_witness) Ordset.t -> stmt:Parse.prec -> t -> t + -> doms:(Prec.Index.t, Prec.Index.cmper_witness) Ordset.t -> stmt:Parse.nonterm_prec -> t -> t (** [insert ~name ~assoc ~doms ~stmt t] creates a [Prec.t] with unique index and returns a new [t] with the production inserted. *) diff --git a/bootstrap/bin/hocc/prod.ml b/bootstrap/bin/hocc/prod.ml index 54b7749f1..ae3028a61 100644 --- a/bootstrap/bin/hocc/prod.ml +++ b/bootstrap/bin/hocc/prod.ml @@ -8,8 +8,8 @@ module T = struct lhs_index: SymbolIndex.t; rhs_indexes: SymbolIndex.t array; prec: Prec.t option; - stmt: Parse.prod option; - reduction: Reduction.t; + stmt: Parse.nonterm_prod option; + callback: Callback.t; } let hash_fold {index; _} state = @@ -18,21 +18,21 @@ module T = struct let cmp {index=index0; _} {index=index1; _} = Index.cmp index0 index1 - let pp {index; lhs_index; rhs_indexes; prec; stmt; reduction} formatter = + let pp {index; lhs_index; rhs_indexes; prec; stmt; callback} formatter = formatter |> Fmt.fmt "{index=" |> Index.pp index |> Fmt.fmt "; lhs_index=" |> SymbolIndex.pp lhs_index |> Fmt.fmt "; rhs_indexes=" |> (Array.pp SymbolIndex.pp) rhs_indexes |> Fmt.fmt "; prec=" |> (Option.pp Prec.pp) prec |> Fmt.fmt "; stmt=" |> (Option.pp Parse.fmt_prod) stmt - |> Fmt.fmt "; reduction=" |> Reduction.pp reduction + |> Fmt.fmt "; callback=" |> Callback.pp callback |> Fmt.fmt "}" end include T include Identifiable.Make(T) -let init ~index ~lhs_index ~rhs_indexes ~prec ~stmt ~reduction = - {index; lhs_index; rhs_indexes; prec; stmt; reduction} +let init ~index ~lhs_index ~rhs_indexes ~prec ~stmt ~callback = + {index; lhs_index; rhs_indexes; prec; stmt; callback} let is_synthetic {stmt; _} = Option.is_none stmt diff --git a/bootstrap/bin/hocc/prod.mli b/bootstrap/bin/hocc/prod.mli index 9b0036633..6dbb64732 100644 --- a/bootstrap/bin/hocc/prod.mli +++ b/bootstrap/bin/hocc/prod.mli @@ -19,17 +19,17 @@ type t = { is [Some p] regardless of whether precedence is specified for just this prod versus all of the nonterm (LHS symbol) prods. *) - stmt: Parse.prod option; + stmt: Parse.nonterm_prod option; (** Declaration AST. *) - reduction: Reduction.t; - (** Reduction code. *) + callback: Callback.t; + (** Reduction callback code. *) } include IdentifiableIntf.S with type t := t val init: index:Index.t -> lhs_index:SymbolIndex.t -> rhs_indexes:SymbolIndex.t array - -> prec:Prec.t option -> stmt:Parse.prod option -> reduction:Reduction.t -> t + -> prec:Prec.t option -> stmt:Parse.nonterm_prod option -> callback:Callback.t -> t (** Used only by [Prods.init]. *) val is_synthetic: t -> bool diff --git a/bootstrap/bin/hocc/prods.ml b/bootstrap/bin/hocc/prods.ml index ba11d1894..ba2f4fcf1 100644 --- a/bootstrap/bin/hocc/prods.ml +++ b/bootstrap/bin/hocc/prods.ml @@ -7,9 +7,9 @@ let empty = Ordmap.empty (module Prod.Index) let length = Ordmap.length -let insert ~lhs_index ~rhs_indexes ~prec ~stmt ~reduction t = +let insert ~lhs_index ~rhs_indexes ~prec ~stmt ~callback t = let index = length t in - let prod = Prod.init ~index ~lhs_index ~rhs_indexes ~prec ~stmt ~reduction in + let prod = Prod.init ~index ~lhs_index ~rhs_indexes ~prec ~stmt ~callback in prod, Ordmap.insert_hlt ~k:index ~v:prod t let prod_of_prod_index = Ordmap.get_hlt diff --git a/bootstrap/bin/hocc/prods.mli b/bootstrap/bin/hocc/prods.mli index e52e9c3f0..c1143ff03 100644 --- a/bootstrap/bin/hocc/prods.mli +++ b/bootstrap/bin/hocc/prods.mli @@ -10,8 +10,8 @@ val empty: t (** [empty] returns an empty set of productions. *) val insert: lhs_index:Symbol.Index.t -> rhs_indexes:Symbol.Index.t array -> prec:Prec.t option - -> stmt:Parse.prod option -> reduction:Reduction.t -> t -> Prod.t * t -(** [insert ~lhs_index ~rhs_indexes ~prec ~stmt ~reduction t] creates a [Prod.t] with unique index + -> stmt:Parse.nonterm_prod option -> callback:Callback.t -> t -> Prod.t * t +(** [insert ~lhs_index ~rhs_indexes ~prec ~stmt ~callback t] creates a [Prod.t] with unique index and returns both the production and a new [t] with the production inserted. *) val length: t -> uns diff --git a/bootstrap/bin/hocc/qualifiedType.ml b/bootstrap/bin/hocc/qualifiedType.ml index 572ec1d4a..a11f25d47 100644 --- a/bootstrap/bin/hocc/qualifiedType.ml +++ b/bootstrap/bin/hocc/qualifiedType.ml @@ -2,57 +2,74 @@ open Basis open! Basis.Rudiments module T = struct - type t = - | Synthetic - | Implicit - | Explicit of { - module_: string; - type_: string; - } - - let hash_fold t state = - match t with - | Synthetic -> state |> Uns.hash_fold 0L - | Implicit -> state |> Uns.hash_fold 1L - | Explicit {module_; type_} -> begin - state - |> Uns.hash_fold 2L - |> String.hash_fold module_ - |> String.hash_fold type_ - end + type explicit = { + module_: string; + type_: string; + } + type t = { + synthetic: bool; + explicit_opt: explicit option; + } + + let hash_fold {synthetic; explicit_opt} state = + state + |> Bool.hash_fold synthetic + |> (fun state -> + match explicit_opt with + | None -> state |> Uns.hash_fold 0L + | Some {module_; type_} -> begin + state + |> Uns.hash_fold 1L + |> String.hash_fold module_ + |> String.hash_fold type_ + end + ) - let cmp t0 t1 = + let cmp {synthetic=s0; explicit_opt=e0} {synthetic=s1; explicit_opt=e1} = let open Cmp in - match t0, t1 with - | Synthetic, Synthetic -> Eq - | Synthetic, (Implicit|Explicit _) -> Lt - | Implicit, Synthetic -> Gt - | Implicit, Implicit -> Eq - | Implicit, Explicit _ -> Lt - | Explicit _, (Synthetic|Implicit) -> Gt - | Explicit {module_=m0; type_=t0}, Explicit {module_=m1; type_=t1} -> begin - match String.cmp m0 m1 with - | Lt -> Lt - | Eq -> String.cmp t0 t1 - | Gt -> Gt + match Bool.cmp s0 s1 with + | Lt -> Lt + | Eq -> begin + match e0, e1 with + | None, None -> Eq + | None, Some _ -> Lt + | Some {module_=m0; type_=t0}, Some {module_=m1; type_=t1} -> begin + match String.cmp m0 m1 with + | Lt -> Lt + | Eq -> String.cmp t0 t1 + | Gt -> Gt + end + | Some _, None -> Gt end + | Gt -> Gt - let pp t formatter = - match t with - | Synthetic -> formatter |> Fmt.fmt "Synthetic" - | Implicit -> formatter |> Fmt.fmt "Implicit" - | Explicit {module_; type_} -> + let pp {synthetic; explicit_opt} formatter = + let pp_explicit {module_; type_} formatter = begin formatter - |> Fmt.fmt "Explicit {module_=" |> String.pp module_ + |> Fmt.fmt "{module_=" |> String.pp module_ |> Fmt.fmt "; type_=" |> String.pp type_ |> Fmt.fmt "}" + end in + formatter + |> Fmt.fmt "{synthetic=" |> Bool.pp synthetic + |> Fmt.fmt "; explicit_opt=" |> Option.pp pp_explicit explicit_opt + |> Fmt.fmt "}" end include T include Identifiable.Make(T) -let synthetic = Synthetic +let synthetic_implicit = + {synthetic=true; explicit_opt=None} + +let implicit = + {synthetic=false; explicit_opt=None} + +let synthetic_explicit ~module_ ~type_ = + {synthetic=true; explicit_opt=Some {module_; type_}} -let implicit = Implicit +let explicit ~module_ ~type_ = + {synthetic=false; explicit_opt=Some {module_; type_}} -let init ~module_ ~type_ = - Explicit {module_; type_} +let synthetic_wrapper {synthetic; explicit_opt} = + assert (not synthetic); + {synthetic=true; explicit_opt} diff --git a/bootstrap/bin/hocc/qualifiedType.mli b/bootstrap/bin/hocc/qualifiedType.mli index 1c6f106fb..6c53de8b3 100644 --- a/bootstrap/bin/hocc/qualifiedType.mli +++ b/bootstrap/bin/hocc/qualifiedType.mli @@ -3,21 +3,30 @@ open Basis open! Basis.Rudiments -type t = - | Synthetic (** Synthetic symbol. *) - | Implicit (** Unspecified type, e.g. simple [token SOME_TOKEN] or reductionless production. *) - | Explicit of { - module_: string; - type_: string; - } (** Symbol with explicitly specified type. *) +type explicit = { + module_: string; + type_: string; +} (** Explicit qualified type. *) + +type t = { + synthetic: bool; (** Synthetic symbol if true. *) + explicit_opt: explicit option; (** Some explicit qualified type, or None (unspecified type, e.g. + simple [token SOME_TOKEN] or reductionless production). *) +} include IdentifiableIntf.S with type t := t -val synthetic: t -(** [synthetic] returns [Synthetic]. *) +val synthetic_implicit: t +(** [synthetic_implicit] returns synthetic implicit qualified type. *) val implicit: t -(** [implicit] returns [Implicit]. *) +(** [implicit] returns non-synthetic implicit qualified type. *) + +val synthetic_explicit: module_:string -> type_:string -> t +(** [synthetic_explicit ~module_ ~type_] returns synthetic explicit qualified type. *) + +val explicit: module_:string -> type_:string -> t +(** [explicit ~module_ ~type_] returns non-synthetic explicit qualified type. *) -val init: module_:string -> type_:string -> t -(** [init ~module_ ~type_] returns [Explicit {module_; type_}]. *) +val synthetic_wrapper: t -> t +(** [synthetic_wrapper t] returns a synthetic wrapper of non-synthetic [t]. *) diff --git a/bootstrap/bin/hocc/reduction.mli b/bootstrap/bin/hocc/reduction.mli deleted file mode 100644 index 9acced53c..000000000 --- a/bootstrap/bin/hocc/reduction.mli +++ /dev/null @@ -1,64 +0,0 @@ -(** Reduction code associated with a production. Conceptually a reduction is simply a block of code, - but there is quite a bit of hair related to binding parameters to production symbols. *) - -open Basis -open Basis.Rudiments - -(** Reduction parameter. *) -module Param : sig - type t = { - binding: string option; - (** Optional binding name for reduction code. Generated code must specify a binding for each RHS - symbol it needs to access. *) - - symbol_name: string; - (** Symbol name corresponding to a [start]/[nonterm] or [token] declaration. *) - - qtype: QualifiedType.t; - (** Qualified type of parameter, e.g. [Explicit {module_:"SomeToken"; type_:"t"}]. *) - - prod_param: Parse.prod_param option; - (** Declaration AST. *) - } - - include IdentifiableIntf.S with type t := t - - val init: binding:string option -> symbol_name:string -> qtype:QualifiedType.t - -> prod_param:Parse.prod_param option -> t -end - -(** Ordered container of reduction parameters. *) -module Params : sig - type t - type elm = Param.t - - include IdentifiableIntf.S with type t := t - include ContainerIntf.SMonoArray with type t := t with type elm := elm - include ContainerIntf.SMonoIndex with type t := t with type elm := elm - - val init: Io.t -> Param.t array -> Io.t * t - val length: t -> uns - val range: t -> range - val get: uns -> t -> Param.t - val map: f:(Param.t -> 'a) -> t -> 'a array -end - -module Index = Uns -type t = { - index: Index.t; - (** Unique reduction index. *) - - lhs: QualifiedType.t; - (** Qualified type of LHS. *) - - rhs: Params.t; - (** RHS parameters. *) - - code: Parse.code option; - (** Optional embedded code to be invoked by generated parser. *) -} - -include IdentifiableIntf.S with type t := t - -val init: index:Index.t -> lhs:QualifiedType.t -> rhs:Params.t -> code:Parse.code option -> t -(** Used only by [Reductions.init]. *) diff --git a/bootstrap/bin/hocc/reductions.ml b/bootstrap/bin/hocc/reductions.ml deleted file mode 100644 index 3cef368f0..000000000 --- a/bootstrap/bin/hocc/reductions.ml +++ /dev/null @@ -1,16 +0,0 @@ -open Basis -open! Basis.Rudiments - -type t = (Reduction.Index.t, Reduction.t, Reduction.Index.cmper_witness) Ordmap.t - -let empty = Ordmap.empty (module Reduction.Index) - -let length = Ordmap.length - -let insert ~lhs ~rhs ~code t = - let index = length t in - let reduction = Reduction.init ~index ~lhs ~rhs ~code in - reduction, Ordmap.insert_hlt ~k:index ~v:reduction t - -let fold ~init ~f t = - Ordmap.fold ~init ~f:(fun accum (_, reduction) -> f accum reduction) t diff --git a/bootstrap/bin/hocc/reductions.mli b/bootstrap/bin/hocc/reductions.mli deleted file mode 100644 index 571d4283d..000000000 --- a/bootstrap/bin/hocc/reductions.mli +++ /dev/null @@ -1,20 +0,0 @@ -(** Collection of all reductions, with automatic assignment of unique indexes. *) -open! Basis -open! Basis.Rudiments - -type t - -val empty: t -(** [empty] returns an empty set of reductions. *) - -val insert: lhs:QualifiedType.t -> rhs:Reduction.Params.t -> code:Parse.code option -> t - -> Reduction.t * t -(** [insert ~lhs ~rhs ~code t] creates a [Reduction.t] with unique index and returns both the - reduction and a new [t] with the reduction inserted. *) - -val length: t -> uns -(** [length t] returns the number of reductions in [t]. *) - -val fold: init:'accum -> f:('accum -> Reduction.t -> 'accum) -> t -> 'accum -(** [fold ~init ~f t] iteratively applies [f] to the reductions in [t], in increasing index order. -*) diff --git a/bootstrap/bin/hocc/scan.ml b/bootstrap/bin/hocc/scan.ml index 3ec2e38e7..7cce69d3e 100644 --- a/bootstrap/bin/hocc/scan.ml +++ b/bootstrap/bin/hocc/scan.ml @@ -12,6 +12,7 @@ module AbstractToken = struct | Tok_left | Tok_right | Tok_prec + | Tok_colon_colon_eq let pp t formatter = formatter |> Fmt.fmt (match t with @@ -24,11 +25,12 @@ module AbstractToken = struct | Tok_left -> "Tok_left" | Tok_right -> "Tok_right" | Tok_prec -> "Tok_prec" + | Tok_colon_colon_eq -> "::=" ) let malformations = function | Tok_hocc | Tok_token | Tok_nonterm | Tok_start | Tok_epsilon - | Tok_neutral | Tok_left | Tok_right | Tok_prec + | Tok_neutral | Tok_left | Tok_right | Tok_prec | Tok_colon_colon_eq -> [] end @@ -79,10 +81,10 @@ let pp {scan; _} formatter = Hmc.Scan.pp scan formatter let rec susp_next scan = lazy begin + let open AbstractToken in let scan', ctok = Hmc.Scan.next scan in let ctok' = match Hmc.Scan.ConcreteToken.atok ctok with | Tok_uident (Constant uident) -> begin - let open AbstractToken in let source = Hmc.Scan.ConcreteToken.source ctok in match uident with | "hocc" -> Token.HoccToken {atok=Tok_hocc; source} @@ -96,6 +98,10 @@ let rec susp_next scan = lazy begin | "prec" -> Token.HoccToken {atok=Tok_prec; source} | _ -> Token.HmcToken ctok end + | Tok_colon_op "::=" -> begin + let source = Hmc.Scan.ConcreteToken.source ctok in + Token.HoccToken {atok=Tok_colon_colon_eq; source} + end | _ -> Token.HmcToken ctok in let t' = {scan=scan'; next=susp_next scan'} in diff --git a/bootstrap/bin/hocc/scan.mli b/bootstrap/bin/hocc/scan.mli index 4be984c9a..279cebd27 100644 --- a/bootstrap/bin/hocc/scan.mli +++ b/bootstrap/bin/hocc/scan.mli @@ -14,6 +14,7 @@ module AbstractToken: sig | Tok_left | Tok_right | Tok_prec + | Tok_colon_colon_eq val pp: t -> (module Fmt.Formatter) -> (module Fmt.Formatter) diff --git a/bootstrap/bin/hocc/spec.ml b/bootstrap/bin/hocc/spec.ml index d5b6a205e..dc4402cb5 100644 --- a/bootstrap/bin/hocc/spec.ml +++ b/bootstrap/bin/hocc/spec.ml @@ -6,7 +6,7 @@ type t = { precs: Precs.t; symbols: Symbols.t; prods: Prods.t; - reductions: Reductions.t; + callbacks: Callbacks.t; states: State.t array; } @@ -18,6 +18,9 @@ let string_of_alias_token token = | Scan.Token.HmcToken {atok=Tok_istring (Constant istring); _} -> istring | _ -> not_reached () +let synthetic_name_of_start_name start_name = + start_name ^ "'" + let precs_init io hmh = let rec fold_precs_tl io precs rels doms precs_tl = begin match precs_tl with @@ -101,7 +104,7 @@ let precs_init io hmh = end in let fold_stmt io precs stmt = begin match stmt with - | Parse.StmtPrec {prec=parse_prec} -> fold_prec io precs parse_prec + | Parse.StmtPrec {prec_=parse_prec} -> fold_prec io precs parse_prec | _ -> io, precs end in let rec fold_stmts_tl io precs stmts_tl = begin @@ -119,7 +122,7 @@ let precs_init io hmh = fold_stmts_tl io precs stmts_tl end end in - let io, precs = match hmh with Parse.Hmh {hocc=Hocc {stmts; _}; _} -> + let io, precs = match hmh with Parse.Hmh {hocc_=Hocc {stmts; _}; _} -> fold_stmts io Precs.empty stmts in io, precs @@ -134,7 +137,7 @@ let tokens_init io precs hmh = type_module=Cident {cident}; type_type=Uident {uident}; _}} -> begin let module_ = string_of_token cident in let type_ = string_of_token uident in - QualifiedType.init ~module_ ~type_ + QualifiedType.explicit ~module_ ~type_ end | OfType0Epsilon -> QualifiedType.implicit in @@ -192,7 +195,7 @@ let tokens_init io precs hmh = end in let fold_stmt io precs symbols stmt = begin match stmt with - | Parse.StmtToken {token} -> fold_token io precs symbols token + | Parse.StmtToken {token_} -> fold_token io precs symbols token_ | _ -> io, symbols end in let rec fold_stmts_tl io precs symbols stmts_tl = begin @@ -210,7 +213,7 @@ let tokens_init io precs hmh = fold_stmts_tl io precs symbols stmts_tl end end in - let io, symbols = match hmh with Parse.Hmh {hocc=Hocc {stmts; _}; _} -> + let io, symbols = match hmh with Parse.Hmh {hocc_=Hocc {stmts; _}; _} -> fold_stmts io precs Symbols.empty stmts in io, symbols @@ -238,7 +241,7 @@ let symbol_infos_init io symbols hmh = let name = string_of_token nonterm_cident in let module_ = string_of_token cident in let type_ = string_of_token uident in - name, QualifiedType.init ~module_ ~type_ + name, QualifiedType.explicit ~module_ ~type_ end in match nonterm with @@ -249,8 +252,8 @@ let symbol_infos_init io symbols hmh = | NontermTypeNonterm _ -> io, symbols | NontermTypeStart _ -> begin (* Synthesize start symbol wrapper. *) - let name' = name ^ "'" in - let qtype' = QualifiedType.Synthetic in + let name' = synthetic_name_of_start_name name in + let qtype' = QualifiedType.synthetic_wrapper qtype in let symbols = insert_symbol_info name' qtype' cident symbols in io, symbols end @@ -260,7 +263,7 @@ let symbol_infos_init io symbols hmh = end in let fold_stmt io symbols stmt = begin match stmt with - | Parse.StmtNonterm {nonterm} -> fold_nonterm io symbols nonterm + | Parse.StmtNonterm {nonterm_} -> fold_nonterm io symbols nonterm_ | _ -> io, symbols end in let rec fold_stmts_tl io symbols stmts_tl = begin @@ -278,7 +281,7 @@ let symbol_infos_init io symbols hmh = fold_stmts_tl io symbols stmts_tl end end in - let io, symbols = match hmh with Parse.Hmh {hocc=Hocc {stmts; _}; _} -> + let io, symbols = match hmh with Parse.Hmh {hocc_=Hocc {stmts; _}; _} -> fold_stmts io symbols stmts in io, symbols @@ -288,13 +291,6 @@ let symbols_init io precs symbols hmh = match prod_param with | Parse.ProdParamBinding {prod_param_symbol; _} | Parse.ProdParam {prod_param_symbol} -> begin - let binding = match prod_param with - | Parse.ProdParamBinding {ident=IdentUident {uident=Uident {uident=ident}}; _} - | Parse.ProdParamBinding {ident=IdentCident {cident=Cident {cident=ident}}; _} -> - Some (string_of_token ident) - | Parse.ProdParamBinding {ident=IdentUscore _; _} - | Parse.ProdParam _ -> None - in let io, symbol_name, qtype = match prod_param_symbol with | ProdParamSymbolCident {cident=Cident {cident}} -> begin let symbol_name = string_of_token cident in @@ -338,8 +334,44 @@ let symbols_init io precs symbols hmh = | Some Symbols.{name; qtype; _} -> io, name, qtype end in + let io, binding = match prod_param with + | Parse.ProdParamBinding + {ident=IdentUident {uident=Uident {uident=ident}}; colon; prod_param_symbol} + | Parse.ProdParamBinding + {ident=IdentCident {cident=Cident {cident=ident}}; colon; prod_param_symbol} -> begin + let Symbols.{name; qtype; _} = match prod_param_symbol with + | ProdParamSymbolCident {cident=Cident {cident}} -> begin + Symbols.info_of_name_hlt (Hmc.Source.Slice.to_string (Scan.Token.source cident)) + symbols + end + | ProdParamSymbolAlias {alias} -> + Symbols.info_of_alias_hlt (string_of_alias_token alias) symbols + in + let io = match qtype with + | {explicit_opt=None; _} -> begin + let base = Scan.Token.source ident |> Hmc.Source.Slice.base in + let past = Scan.Token.source colon |> Hmc.Source.Slice.past in + let source = Hmc.Source.Slice.of_cursors ~base ~past in + let io = + io.err + |> Fmt.fmt "hocc: At " + |> Hmc.Source.Slice.pp source + |> Fmt.fmt ": Cannot bind to empty symbol variant: " + |> Fmt.fmt (Hmc.Source.Slice.to_string (Scan.Token.source ident)) + |> Fmt.fmt ":" |> Fmt.fmt name |> Fmt.fmt "\n" + |> Io.with_err io + in + Io.fatal io + end + | {explicit_opt=Some _; _} -> io + in + io, Some (string_of_token ident) + end + | Parse.ProdParamBinding {ident=IdentUscore _; _} + | Parse.ProdParam _ -> io, None + in let param = - Reduction.Param.init ~binding ~symbol_name ~qtype ~prod_param:(Some prod_param) in + Callback.Param.init ~binding ~symbol_name ~qtype ~prod_param:(Some prod_param) in io, param :: prod_params end end in @@ -350,7 +382,7 @@ let symbols_init io precs symbols hmh = let io, prod_params = fold_prod_param io symbols prod_params prod_param in fold_prod_params_tl io symbols prod_params prod_params_tl end - | ProdParamsTlEpsilon -> Reduction.Params.init io (Array.of_list_rev prod_params) + | ProdParamsTlEpsilon -> Callback.Params.init io (Array.of_list_rev prod_params) end in let fold_prod_pattern io symbols prod_pattern = begin match prod_pattern with @@ -359,9 +391,9 @@ let symbols_init io precs symbols hmh = let io, prod_params = fold_prod_param io symbols [] prod_param in fold_prod_params_tl io symbols prod_params prod_params_tl end - | ProdPatternEpsilon _ -> Reduction.Params.init io [||] + | ProdPatternEpsilon _ -> Callback.Params.init io [||] end in - let fold_prod io precs symbols prods reductions ~nonterm_info ~nonterm_prec ~code ~reduction + let fold_prod io precs symbols prods callbacks ~nonterm_info ~nonterm_prec ~code ~callback nonterm_prods_set prod = begin match prod with | Parse.Prod {prod_pattern; prec_ref} -> begin @@ -371,7 +403,7 @@ let symbols_init io precs symbols hmh = | Some _ -> io | None -> begin (* Codeless productions have no use for parameter bindings. *) - Reduction.Params.fold ~init:io ~f:(fun io Reduction.Param.{binding; prod_param; _} -> + Callback.Params.fold ~init:io ~f:(fun io Callback.Param.{binding; prod_param; _} -> match binding with | Some binding -> begin let binding_token = match prod_param with @@ -390,7 +422,7 @@ let symbols_init io precs symbols hmh = ) rhs end in - let rhs_indexes = Reduction.Params.map ~f:(fun Reduction.Param.{symbol_name; _} -> + let rhs_indexes = Callback.Params.map ~f:(fun Callback.Param.{symbol_name; _} -> match Symbols.info_of_name_hlt symbol_name symbols with Symbols.{index; _} -> index ) rhs in let prec = match prec_ref with @@ -422,68 +454,64 @@ let symbols_init io precs symbols hmh = end | PrecRefEpsilon -> nonterm_prec in - let lhs = nonterm_info.qtype in - let reduction, reductions = match reduction with - | Some reduction -> reduction, reductions - | None -> begin - let reduction, reductions = Reductions.insert ~lhs ~rhs ~code reductions in - reduction, reductions - end + let lhs = nonterm_info in + let callback, callbacks = match callback with + | Some callback -> callback, callbacks + | None -> Callbacks.insert ~lhs ~rhs ~code callbacks in let prod, prods = - Prods.insert ~lhs_index ~rhs_indexes ~prec ~stmt:(Some prod) ~reduction prods in + Prods.insert ~lhs_index ~rhs_indexes ~prec ~stmt:(Some prod) ~callback prods in let nonterm_prods_set = Ordset.insert prod nonterm_prods_set in - io, nonterm_prods_set, prods, reductions, prod + io, nonterm_prods_set, prods, callbacks, prod end end in - let rec fold_prods_tl io precs symbols prods reductions ~nonterm_info ~nonterm_prec - ~code ~reduction nonterm_prods_set prods_tl = begin + let rec fold_prods_tl io precs symbols prods callbacks ~nonterm_info ~nonterm_prec ~code ~callback + nonterm_prods_set prods_tl = begin match prods_tl with | Parse.ProdsTlBarProd {prod; prods_tl; _} -> begin - let io, nonterm_prods_set, prods, reductions, _prod = - fold_prod io precs symbols prods reductions ~nonterm_info ~nonterm_prec - ~code ~reduction nonterm_prods_set prod in - fold_prods_tl io precs symbols prods reductions ~nonterm_info ~nonterm_prec - ~code ~reduction nonterm_prods_set prods_tl + let io, nonterm_prods_set, prods, callbacks, _prod = + fold_prod io precs symbols prods callbacks ~nonterm_info ~nonterm_prec ~code ~callback + nonterm_prods_set prod in + fold_prods_tl io precs symbols prods callbacks ~nonterm_info ~nonterm_prec ~code ~callback + nonterm_prods_set prods_tl end - | ProdsTlEpsilon -> io, nonterm_prods_set, prods, reductions + | ProdsTlEpsilon -> io, nonterm_prods_set, prods, callbacks end in - let fold_prods io precs symbols prods reductions ~nonterm_info ~nonterm_prec - parse_prods = begin + let fold_prods io precs symbols prods callbacks ~nonterm_info ~nonterm_prec parse_prods = begin match parse_prods with | Parse.ProdsBarProd {prod; prods_tl; _} | ProdsProd {prod; prods_tl} -> begin let code = None in - let reduction = None in + let callback = None in let nonterm_prods_set = Ordset.empty (module Prod) in - let io, nonterm_prods_set, prods, reductions, _prod = - fold_prod io precs symbols prods reductions ~nonterm_info ~nonterm_prec ~code - ~reduction nonterm_prods_set prod in - fold_prods_tl io precs symbols prods reductions ~nonterm_info ~nonterm_prec ~code - ~reduction nonterm_prods_set prods_tl + let io, nonterm_prods_set, prods, callbacks, _prod = + fold_prod io precs symbols prods callbacks ~nonterm_info ~nonterm_prec ~code ~callback + nonterm_prods_set prod in + fold_prods_tl io precs symbols prods callbacks ~nonterm_info ~nonterm_prec ~code ~callback + nonterm_prods_set prods_tl end end in - let fold_reduction io precs symbols prods reductions ~nonterm_info ~nonterm_prec - nonterm_prods_set reduction = begin + let fold_reduction io precs symbols prods callbacks ~nonterm_info ~nonterm_prec nonterm_prods_set + reduction = begin match reduction with | Parse.Reduction {prods=parse_prods; code; _} -> begin - (* Map one or more prods to a single reduction. *) + (* Map one or more prods to a single reduction callback. *) match parse_prods with | ProdsBarProd {prod=parse_prod; prods_tl; _} | ProdsProd {prod=parse_prod; prods_tl} -> begin let reduction_prods = Ordset.empty (module Prod) in - let io, reduction_prods_merge, prods, reductions, prod = - fold_prod io precs symbols prods reductions ~nonterm_info ~nonterm_prec - ~code:(Some code) ~reduction:None reduction_prods parse_prod in + let io, reduction_prods_merge, prods, callbacks, prod = + fold_prod io precs symbols prods callbacks ~nonterm_info ~nonterm_prec + ~code:(Some code) ~callback:None reduction_prods parse_prod in let reduction_prods = Ordset.union reduction_prods_merge reduction_prods in - let io, reduction_prods_merge, prods, reductions = - fold_prods_tl io precs symbols prods reductions ~nonterm_info ~nonterm_prec - ~code:(Some code) ~reduction:(Some prod.reduction) reduction_prods prods_tl in + let io, reduction_prods_merge, prods, callbacks = + fold_prods_tl io precs symbols prods callbacks ~nonterm_info ~nonterm_prec + ~code:(Some code) ~callback:(Some prod.callback) reduction_prods prods_tl in let reduction_prods = Ordset.union reduction_prods_merge reduction_prods in (* Verify that the prods' parameters are uniform. *) let () = Ordset.iter ~f:(fun prod1 -> let open Cmp in - match Reduction.Params.cmp Prod.(prod.reduction.rhs) Prod.(prod1.reduction.rhs) with + match Callback.Params.cmp Prod.(prod.callback.rhs) Prod.(prod1.callback.rhs) with | Lt | Gt -> begin let pattern_source = Option.value_hlt ( @@ -502,35 +530,35 @@ let symbols_init io precs symbols hmh = | Eq -> () ) reduction_prods in let nonterm_prods_set = Ordset.union reduction_prods nonterm_prods_set in - io, nonterm_prods_set, prods, reductions + io, nonterm_prods_set, prods, callbacks end end end in - let rec fold_reductions_tl io precs symbols prods reductions ~nonterm_info ~nonterm_prec + let rec fold_reductions_tl io precs symbols prods callbacks ~nonterm_info ~nonterm_prec nonterm_prods_set reductions_tl = begin match reductions_tl with | Parse.ReductionsTlBarReduction {reduction; reductions_tl; _} -> begin - let io, nonterm_prods_set, prods, reductions = - fold_reduction io precs symbols prods reductions ~nonterm_info ~nonterm_prec + let io, nonterm_prods_set, prods, callbacks = + fold_reduction io precs symbols prods callbacks ~nonterm_info ~nonterm_prec nonterm_prods_set reduction in - fold_reductions_tl io precs symbols prods reductions ~nonterm_info ~nonterm_prec + fold_reductions_tl io precs symbols prods callbacks ~nonterm_info ~nonterm_prec nonterm_prods_set reductions_tl end - | ReductionsTlEpsilon -> io, nonterm_prods_set, prods, reductions + | ReductionsTlEpsilon -> io, nonterm_prods_set, prods, callbacks end in - let fold_reductions io precs symbols prods reductions ~nonterm_info ~nonterm_prec + let fold_reductions io precs symbols prods callbacks ~nonterm_info ~nonterm_prec parse_reductions = begin match parse_reductions with | Parse.ReductionsReduction {reduction; reductions_tl} -> begin let nonterm_prods_set = Ordset.empty (module Prod) in - let io, nonterm_prods_set, prods, reductions = - fold_reduction io precs symbols prods reductions ~nonterm_info ~nonterm_prec + let io, nonterm_prods_set, prods, callbacks = + fold_reduction io precs symbols prods callbacks ~nonterm_info ~nonterm_prec nonterm_prods_set reduction in - fold_reductions_tl io precs symbols prods reductions ~nonterm_info ~nonterm_prec + fold_reductions_tl io precs symbols prods callbacks ~nonterm_info ~nonterm_prec nonterm_prods_set reductions_tl end end in - let fold_nonterm io precs symbols prods reductions nonterm = begin + let fold_nonterm io precs symbols prods callbacks nonterm = begin let start, name, prec = match nonterm with | Parse.NontermProds {nonterm_type; cident=Cident {cident}; prec_ref; _} | NontermReductions {nonterm_type; cident=Cident {cident}; prec_ref; _} -> begin @@ -561,57 +589,61 @@ let symbols_init io precs symbols hmh = in let (Symbols.{index; qtype; _} as nonterm_info) = Symbols.info_of_name_hlt name symbols in let nonterm_prec = prec in - let io, nonterm_prods, prods, reductions = match nonterm with + let io, nonterm_prods, prods, callbacks = match nonterm with | NontermProds {prods=parse_prods; _} -> - fold_prods io precs symbols prods reductions ~nonterm_info ~nonterm_prec parse_prods - | NontermReductions {reductions=parse_reductions; _} -> - fold_reductions io precs symbols prods reductions ~nonterm_info ~nonterm_prec - parse_reductions + fold_prods io precs symbols prods callbacks ~nonterm_info ~nonterm_prec parse_prods + | NontermReductions {reductions; _} -> + fold_reductions io precs symbols prods callbacks ~nonterm_info ~nonterm_prec reductions in let symbols = Symbols.insert_nonterm ~name ~prec ~stmt:(Some nonterm) ~start ~prods:nonterm_prods symbols in - let io, symbols, prods, reductions = match start with - | false -> io, symbols, prods, reductions + let io, symbols, prods, callbacks = match start with + | false -> io, symbols, prods, callbacks | true -> begin (* Synthesize wrapper for start symbol. *) let name' = name ^ "'" in let Symbols.{index=index'; _} = Symbols.info_of_name_hlt name' symbols in + let lhs = Symbols.{ + index=index'; + name=name'; + alias=None; + qtype=QualifiedType.synthetic_wrapper qtype; + } in let Symbol.{index=pe_index; name=pe_name; qtype=pe_qtype; _} = Symbol.pseudo_end in - let io, rhs = Reduction.Params.init io [| - Reduction.Param.init ~binding:(Some "start") ~symbol_name:name ~qtype ~prod_param:None; - Reduction.Param.init ~binding:None ~symbol_name:pe_name ~qtype:pe_qtype + let io, rhs = Callback.Params.init io [| + Callback.Param.init ~binding:(Some "start") ~symbol_name:name ~qtype ~prod_param:None; + Callback.Param.init ~binding:None ~symbol_name:pe_name ~qtype:pe_qtype ~prod_param:None; |] in - let reduction, reductions = - Reductions.insert ~lhs:QualifiedType.synthetic ~rhs ~code:None reductions in + let callback, callbacks = Callbacks.insert ~lhs ~rhs ~code:None callbacks in let prod, prods = Prods.insert ~lhs_index:index' ~rhs_indexes:[|index; pe_index|] - ~prec:None ~stmt:None ~reduction prods in + ~prec:None ~stmt:None ~callback prods in let nonterm_prods = Ordset.singleton (module Prod) prod in let symbols = Symbols.insert_nonterm ~name:name' ~prec:None ~stmt:None ~start ~prods:nonterm_prods symbols in - io, symbols, prods, reductions + io, symbols, prods, callbacks end in - io, symbols, prods, reductions + io, symbols, prods, callbacks end in - let fold_stmt io precs symbols prods reductions stmt = begin + let fold_stmt io precs symbols prods callbacks stmt = begin match stmt with - | Parse.StmtNonterm {nonterm} -> fold_nonterm io precs symbols prods reductions nonterm - | _ -> io, symbols, prods, reductions + | Parse.StmtNonterm {nonterm_} -> fold_nonterm io precs symbols prods callbacks nonterm_ + | _ -> io, symbols, prods, callbacks end in - let rec fold_stmts_tl io precs symbols prods reductions stmts_tl = begin + let rec fold_stmts_tl io precs symbols prods callbacks stmts_tl = begin match stmts_tl with | Parse.StmtsTl {stmt; stmts_tl; _} -> begin - let io, symbols, prods, reductions = fold_stmt io precs symbols prods reductions stmt in - fold_stmts_tl io precs symbols prods reductions stmts_tl + let io, symbols, prods, callbacks = fold_stmt io precs symbols prods callbacks stmt in + fold_stmts_tl io precs symbols prods callbacks stmts_tl end - | StmtsTlEpsilon -> io, symbols, prods, reductions + | StmtsTlEpsilon -> io, symbols, prods, callbacks end in - let fold_stmts io precs symbols prods reductions stmts = begin + let fold_stmts io precs symbols prods callbacks stmts = begin match stmts with | Parse.Stmts {stmt; stmts_tl} -> begin - let io, symbols, prods, reductions = fold_stmt io precs symbols prods reductions stmt in - fold_stmts_tl io precs symbols prods reductions stmts_tl + let io, symbols, prods, callbacks = fold_stmt io precs symbols prods callbacks stmt in + fold_stmts_tl io precs symbols prods callbacks stmts_tl end end in (* Compute first/follow sets for all symbols. *) @@ -704,16 +736,16 @@ let symbols_init io precs symbols hmh = (* Extract the non-terminal specifications from the AST. The end result will be: * * - `symbols`: Opaquely managed symbols collection - * - `prods`/`reductions` arrays: Each element encodes its own array offset + * - `prods`/`callbacks` arrays: Each element encodes its own array offset * * Tokens have already been fully extracted into `symbols`, and basic info for non-terminals has - * already been extracted into `symbols`; prod/reduction indexes are incrementally assigned during + * already been extracted into `symbols`; prod/callback indexes are incrementally assigned during * AST traversal. *) - let reductions = Reductions.empty in + let callbacks = Callbacks.empty in let prods = Prods.empty in - let io, symbols, prods, reductions = - match hmh with Parse.Hmh {hocc=Hocc {stmts; _}; _} -> - fold_stmts io precs symbols prods reductions stmts + let io, symbols, prods, callbacks = + match hmh with Parse.Hmh {hocc_=Hocc {stmts; _}; _} -> + fold_stmts io precs symbols prods callbacks stmts in (* Close on symbols' first/follow sets. *) let symbols = close_symbols symbols in @@ -741,7 +773,7 @@ let symbols_init io precs symbols hmh = |> Fmt.fmt "\n" |> Io.with_log io in - io, symbols, prods, reductions + io, symbols, prods, callbacks let compat_init algorithm ~resolve io symbols prods = let io, compat_string, compat = match algorithm with @@ -757,7 +789,7 @@ let compat_init algorithm ~resolve io symbols prods = in io, compat -let rec isocores_init algorithm ~resolve io precs symbols prods reductions = +let rec isocores_init algorithm ~resolve io precs symbols prods callbacks = (* Collect the LR(1) item set closures that comprise the initial work queue. There is one such * closure for each synthetic start symbol. *) let init symbols ~compat = begin @@ -857,7 +889,7 @@ let rec isocores_init algorithm ~resolve io precs symbols prods reductions = |> Io.with_log io in let io, lalr1_isocores, lalr1_states = - init_inner Conf.Lalr1 ~resolve:false io precs symbols prods reductions in + init_inner Conf.Lalr1 ~resolve:false io precs symbols prods callbacks in Ielr1.gen_gotonub_of_statenub_goto ~resolve io symbols prods lalr1_isocores lalr1_states end | _ -> begin @@ -1145,8 +1177,8 @@ and hmh_extract io hmh = let io, precs = precs_init io hmh in let io, symbols = tokens_init io precs hmh in let io, symbols = symbol_infos_init io symbols hmh in - let io, symbols, prods, reductions = symbols_init io precs symbols hmh in - io, precs, symbols, prods, reductions + let io, symbols, prods, callbacks = symbols_init io precs symbols hmh in + io, precs, symbols, prods, callbacks and gc_states io isocores states = let state_indexes_reachable states = begin @@ -1363,9 +1395,9 @@ and remerge_states io symbols isocores states = io, reindexed_isocores, reindexed_states end -and init_inner algorithm ~resolve io precs symbols prods reductions = +and init_inner algorithm ~resolve io precs symbols prods callbacks = let io, isocores, gotonub_of_statenub_goto = - isocores_init algorithm ~resolve io precs symbols prods reductions in + isocores_init algorithm ~resolve io precs symbols prods callbacks in let io, states = states_init io ~resolve symbols prods isocores ~gotonub_of_statenub_goto in io, isocores, states @@ -1382,585 +1414,15 @@ and init algorithm ~resolve io hmh = |> Fmt.fmt " specification\n" |> Io.with_log io in - let io, precs, symbols, prods, reductions = hmh_extract io hmh in - let io, isocores, states = init_inner algorithm ~resolve io precs symbols prods reductions in + let io, precs, symbols, prods, callbacks = hmh_extract io hmh in + let io, isocores, states = init_inner algorithm ~resolve io precs symbols prods callbacks in let io, isocores, states = gc_states io isocores states in let io, _isocores, states = remerge_states io symbols isocores states in let io = log_unused io precs symbols prods states in - io, {algorithm; precs; symbols; prods; reductions; states} + io, {algorithm; precs; symbols; prods; callbacks; states} let conflicts {states; _} = match Array.reduce ~f:Uns.(+) (Array.map ~f:(fun state -> State.conflicts ~filter_pseudo_end:false state) states) with | None -> 0L | Some conflicts -> conflicts - -type description = - | DescriptionTxt - | DescriptionHtml - -let to_description conf io description t = - let sink _ formatter = formatter in - let passthrough s formatter = formatter |> Fmt.fmt s in - let txt = match description with - | DescriptionTxt -> passthrough - | DescriptionHtml -> sink - in - let html = match description with - | DescriptionTxt -> sink - | DescriptionHtml -> passthrough - in - let pp_symbol_index symbol_index formatter = begin - let symbol = Symbols.symbol_of_symbol_index symbol_index t.symbols in - let pretty_name = match symbol.alias with - | None -> symbol.name - | Some alias -> - String.Fmt.empty - |> txt "\"" |> html "“" - |> Fmt.fmt alias - |> txt "\"" |> html "”" - |> Fmt.to_string - in - formatter |> html " html symbol.name |> html "\">" - |> Fmt.fmt pretty_name |> html "" - end in - let pp_symbol_set symbol_set formatter = begin - formatter - |> Fmt.fmt "{" - |> (fun formatter -> - Ordset.foldi ~init:formatter ~f:(fun i formatter symbol_index -> - formatter - |> (fun formatter -> match i with 0L -> formatter | _ -> formatter |> Fmt.fmt ", ") - |> pp_symbol_index symbol_index - ) symbol_set - ) - |> Fmt.fmt "}" - end in - let pp_prec prec_ind formatter = begin - let ref_name = (Precs.prec_of_prec_index prec_ind t.precs).name in - formatter - |> Fmt.fmt "prec " |> html " html ref_name |> html "\">" - |> Fmt.fmt ref_name - |> html "" - end in - let pp_prod ?(do_pp_prec=true) Prod.{lhs_index; rhs_indexes; prec; _} formatter = begin - let lhs_name = Symbol.name (Symbols.symbol_of_symbol_index lhs_index t.symbols) in - formatter - |> html " html lhs_name |> html "\">" - |> Fmt.fmt lhs_name - |> html "" |> Fmt.fmt " ::=" - |> (fun formatter -> - match Array.length rhs_indexes with - | 0L -> formatter |> Fmt.fmt " epsilon" - | _ -> begin - Array.fold ~init:formatter ~f:(fun formatter rhs_index -> - let rhs_name = Symbol.name (Symbols.symbol_of_symbol_index rhs_index t.symbols) in - formatter - |> Fmt.fmt " " - |> html " html rhs_name |> html "\">" - |> pp_symbol_index rhs_index - |> html "" - ) rhs_indexes - end - ) - |> (fun formatter -> - match do_pp_prec, prec with - | false, _ - | _, None -> formatter - | true, Some {index=prec_ind; _} -> formatter |> Fmt.fmt " " |> pp_prec prec_ind - ) - end in - let pp_lr0item lr0item formatter = begin - let Lr0Item.{prod; dot} = lr0item in - let Prod.{lhs_index; rhs_indexes; _} = prod in - formatter - |> Fmt.fmt (Symbol.name (Symbols.symbol_of_symbol_index lhs_index t.symbols)) - |> Fmt.fmt " ::=" - |> (fun formatter -> - Array.foldi ~init:formatter ~f:(fun i formatter rhs_index -> - formatter - |> Fmt.fmt (match i = dot with - | false -> "" - | true -> " ·" - ) - |> Fmt.fmt " " - |> pp_symbol_index rhs_index - ) rhs_indexes - |> Fmt.fmt ( - match Array.length rhs_indexes = dot with - | false -> "" - | true -> " ·" - ) - ) - end in - let pp_lr1item ?(do_pp_prec=true) lr1item formatter = begin - let Lr1Item.{lr0item; _} = lr1item in - let Lr0Item.{prod; _} = lr0item in - let Prod.{prec; _} = prod in - formatter - |> Fmt.fmt "[" - |> pp_lr0item lr0item - |> Fmt.fmt ", {" - |> (fun formatter -> - Array.foldi ~init:formatter ~f:(fun i formatter symbol_index -> - formatter - |> Fmt.fmt (match i with - | 0L -> "" - | _ -> ", " - ) - |> pp_symbol_index symbol_index - ) (Ordset.to_array Lr1Item.(lr1item.follow)) - ) - |> Fmt.fmt "}]" - |> (fun formatter -> - match do_pp_prec, prec with - | false, _ - | _, None -> formatter - | true, Some {index=prec_index; _} -> formatter |> Fmt.fmt " " |> pp_prec prec_index - ) - end in - let pp_state_index state_index formatter = begin - let state_index_string = String.Fmt.empty |> State.Index.pp state_index |> Fmt.to_string in - formatter - |> html " html state_index_string |> html "\">" - |> Fmt.fmt state_index_string - |> html "" - end in - let pp_action symbol_index action formatter = begin - let pp_symbol_prec symbol_index formatter = begin - let symbol = Symbols.symbol_of_symbol_index symbol_index t.symbols in - match symbol.prec with - | None -> formatter - | Some Prec.{index; _} -> formatter |> Fmt.fmt " " |> pp_prec index - end in - let pp_reduce_prec Prod.{lhs_index; prec; _} formatter = begin - match prec with - | Some _ -> formatter - | None -> formatter |> pp_symbol_prec lhs_index - end in - let open State.Action in - match action with - | ShiftPrefix state_index -> - formatter - |> Fmt.fmt "ShiftPrefix " |> pp_state_index state_index - |> pp_symbol_prec symbol_index - | ShiftAccept state_index -> - formatter - |> Fmt.fmt "ShiftAccept " |> pp_state_index state_index - |> pp_symbol_prec symbol_index - | Reduce prod_index -> begin - let prod = Prods.prod_of_prod_index prod_index t.prods in - formatter |> Fmt.fmt "Reduce " |> pp_prod prod - |> pp_reduce_prec prod - end - end in - let pp_contrib contrib formatter = begin - assert ((Contrib.length contrib) = 1L); - assert (not (Contrib.mem_shift contrib)); - let prod_index = Contrib.reduces contrib |> Ordset.choose_hlt in - let prod = Prods.prod_of_prod_index prod_index t.prods in - formatter - |> Fmt.fmt "Reduce " - |> pp_prod ~do_pp_prec:false prod - end in - let io = - io.log - |> Fmt.fmt "hocc: Generating " - |> txt "text" |> html "html" - |> Fmt.fmt " report\n" - |> Io.with_log io - in - let nprecs = Precs.length t.precs in - let states_algorithm = match Conf.algorithm conf with - | Lr1 -> "LR(1)" - | Ielr1 -> "IELR(1)" - | Pgm1 -> "PGM(1)" - | Lalr1 -> "LALR(1)" - in - (match description with - | DescriptionTxt -> io.txt - | DescriptionHtml -> io.html - ) - |> html "\n" - |> html "\n" - |> html "

" |> Fmt.fmt (Path.Segment.to_string_hlt (Conf.module_ conf)) - |> Fmt.fmt " grammar" |> html "

" |> Fmt.fmt "\n" - |> Fmt.fmt "\n" - |> html "

Sections

\n" - |> html " \n" - |> html "
\n" - |> (fun formatter -> match nprecs with - | 0L -> formatter - | _ -> - formatter |> html "

" |> Fmt.fmt "Precedences" - |> (fun formatter -> match (Conf.resolve conf) with - | true -> formatter - | false -> formatter |> Fmt.fmt " (conflict resolution disabled)" - ) - |> html "

" - |> Fmt.fmt"\n" - ) - |> html "
    \n" - |> (fun formatter -> - Precs.fold ~init:formatter ~f:(fun formatter Prec.{name; assoc; doms; _} -> - formatter - |> Fmt.fmt " " |> html "
  • " - |> Fmt.fmt (match assoc with - | None -> "neutral" - | Some Left -> "left" - | Some Right -> "right" - ) - |> Fmt.fmt " " |> html " html name |> html "\">" - |> Fmt.fmt name - |> html "" - |> (fun formatter -> - match Ordset.is_empty doms with - | true -> formatter - | false -> begin - let _, formatter = Ordset.fold ~init:(true, formatter) - ~f:(fun (first, formatter) prec_ind -> - let ref_name = (Precs.prec_of_prec_index prec_ind t.precs).name in - let formatter = - formatter - |> Fmt.fmt (match first with - | true -> " < " - | false -> ", " - ) - |> html " html ref_name |> html "\">" - |> Fmt.fmt ref_name - |> html "" - in - (false, formatter) - ) doms - in - formatter - end - ) - |> html "
  • " |> Fmt.fmt "\n" - ) t.precs - ) - |> html "
\n" - |> html "

" |> Fmt.fmt "Tokens" |> html "

" |> Fmt.fmt "\n" - |> html "
    \n" - |> (fun formatter -> - Symbols.symbols_fold ~init:formatter - ~f:(fun formatter (Symbol.{name; alias; qtype; prec; first; follow; _} as symbol) -> - match Symbol.is_token symbol with - | false -> formatter - | true -> begin - formatter - |> Fmt.fmt " " |> html "
  • " |> Fmt.fmt "token " - |> html " html name |> html "\">" - |> Fmt.fmt name - |> html "" - |> (fun formatter -> - match alias with - | None -> formatter - | Some alias -> formatter |> Fmt.fmt " " |> String.pp alias - ) - |> (fun formatter -> - match qtype with - | Synthetic - | Implicit -> formatter - | Explicit {module_; type_} -> - formatter |> Fmt.fmt " of " |> Fmt.fmt module_ |> Fmt.fmt "." |> Fmt.fmt type_ - ) - |> (fun formatter -> - match prec with - | None -> formatter - | Some {index=prec_index; _} -> formatter |> Fmt.fmt " " |> pp_prec prec_index - ) - |> Fmt.fmt "\n" - |> html "
      \n" - |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "First: " - |> pp_symbol_set first - |> html "
    • " |> Fmt.fmt "\n" - |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Follow: " - |> pp_symbol_set follow - |> html "
    • " |> Fmt.fmt "\n" - |> html "
    \n" - |> html "
  • \n" - end - ) t.symbols - ) - |> html "
\n" - |> html "

" |> Fmt.fmt "Non-terminals" |> html "

" |> Fmt.fmt "\n" - |> html "
    \n" - |> (fun formatter -> - Symbols.symbols_fold ~init:formatter - ~f:(fun formatter (Symbol.{name; start; qtype; prods; first; follow; _} as symbol) -> - match Symbol.is_nonterm symbol with - | false -> formatter - | true -> begin - formatter - |> Fmt.fmt " " |> html "
  • " - |> Fmt.fmt (match start with - | true -> "start " - | false -> "nonterm " - ) - |> html " html name |> html "\">" - |> Fmt.fmt name - |> html "" - |> (fun formatter -> - match qtype with - | Synthetic - | Implicit -> formatter - | Explicit {module_; type_} -> - formatter |> Fmt.fmt " of " |> Fmt.fmt module_ |> Fmt.fmt "." |> Fmt.fmt type_ - ) - |> Fmt.fmt "\n" - |> html "
      \n" - |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "First: " - |> pp_symbol_set first - |> html "
    • " |> Fmt.fmt "\n" - |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Follow: " - |> pp_symbol_set follow - |> html "
    • " |> Fmt.fmt "\n" - |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Productions\n" - |> html "
        \n" - |> (fun formatter -> - Ordset.fold ~init:formatter - ~f:(fun formatter prod -> - formatter - |> Fmt.fmt " " |> html "
      • " - |> pp_prod prod - |> html "
      • " |> Fmt.fmt "\n" - ) prods - |> html "
      \n" - |> html "
    • \n" - ) - |> html "
    \n" - |> html "
  • \n" - end - ) t.symbols - ) - |> html "
\n" - |> html "

" |> Fmt.fmt states_algorithm |> Fmt.fmt " States" |> html "

" - |> Fmt.fmt "\n" - |> html "
    \n" - |> (fun formatter -> - Array.fold ~init:formatter - ~f:(fun formatter (State.{statenub; actions; gotos; _} as state) -> - let state_index_string = - String.Fmt.empty |> StateNub.Index.pp (StateNub.index statenub) - |> Fmt.to_string in - formatter - |> Fmt.fmt " " |> html "
  • " |> Fmt.fmt "State " - |> html " html state_index_string |> html "\">" - |> Fmt.fmt state_index_string - |> (fun formatter -> - match t.algorithm with - | Lr1 - | Ielr1 - | Pgm1 -> begin - formatter - |> Fmt.fmt " [" - |> Uns.pp (StateNub.isocores_sn statenub) - |> Fmt.fmt "." - |> Uns.pp (StateNub.isocore_set_sn statenub) - |> Fmt.fmt "]" - end - | Lalr1 -> formatter - ) - |> html "" |> Fmt.fmt "\n" - |> html "
      \n" - |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Kernel\n" - |> html "
        \n" - |> (fun formatter -> - Lr1Itemset.fold ~init:formatter ~f:(fun formatter lr1itemset -> - formatter - |> Fmt.fmt " " |> html "
      • " - |> pp_lr1item lr1itemset - |> html "
      • " |> Fmt.fmt "\n" - ) statenub.lr1itemsetclosure.kernel - ) - |> html "
      \n" - |> html "
    • \n" - |> (fun formatter -> - match Lr1Itemset.is_empty statenub.lr1itemsetclosure.added with - | true -> formatter - | false -> begin - formatter - |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Added\n" - |> html "
        \n" - |> (fun formatter -> - Lr1Itemset.fold ~init:formatter ~f:(fun formatter lr1itemset -> - formatter |> Fmt.fmt " " |> html "
      • " - |> pp_lr1item lr1itemset - |> html "
      • " |> Fmt.fmt "\n" - ) statenub.lr1itemsetclosure.added - ) - |> html "
      \n" - |> html "
    • \n" - end - ) - |> (fun formatter -> - let has_pseudo_end_conflict = State.has_pseudo_end_conflict state in - formatter - |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Actions\n" - |> html "
        \n" - |> (fun formatter -> - Ordmap.fold ~init:formatter ~f:(fun formatter (symbol_index, action_set) -> - formatter - |> (fun formatter -> - match has_pseudo_end_conflict && symbol_index = Symbol.pseudo_end.index with - | false -> formatter |> Fmt.fmt " " |> html "
      • " - | true -> formatter |> txt "CONFLICT " |> html "
      • CONFLICT " - ) - |> pp_symbol_index symbol_index |> Fmt.fmt " :" - |> (fun formatter -> - match Ordset.length action_set with - | 1L -> begin - formatter - |> Fmt.fmt " " - |> pp_action symbol_index (Ordset.choose_hlt action_set) - |> html "
      • " |> Fmt.fmt "\n" - end - | _ -> begin - formatter - |> html " CONFLICT" |> Fmt.fmt "\n" - |> html "
          \n" - |> (fun formatter -> - Ordset.fold ~init:formatter ~f:(fun formatter action -> - formatter - |> txt "CONFLICT " |> html "
        • " - |> pp_action symbol_index action - |> html "
        • " |> Fmt.fmt "\n" - ) action_set - ) - |> html "
        \n" - end - ) - ) actions - ) - |> html "
      \n" - |> html "
    • \n" - ) - |> (fun formatter -> - match Ordmap.is_empty gotos with - | true -> formatter - | false -> begin - formatter - |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Gotos\n" - |> html "
        \n" - |> (fun formatter -> - Ordmap.fold ~init:formatter ~f:(fun formatter (symbol_index, state_index) -> - formatter - |> Fmt.fmt " " |> html "
      • " - |> pp_symbol_index symbol_index |> Fmt.fmt " : " |> State.Index.pp state_index - |> html "
      • " |> Fmt.fmt "\n" - ) gotos - ) - |> html "
      \n" - |> html "
    • \n" - end - ) - |> (fun formatter -> - let kernel_attribs = StateNub.filtered_kernel_attribs statenub in - match KernelAttribs.length kernel_attribs with - | 0L -> formatter - | _ -> begin - let kernel_attribs = StateNub.filtered_kernel_attribs statenub in - formatter - |> Fmt.fmt " " |> html "
    • " |> Fmt.fmt "Conflict contributions\n" - |> html "
        \n" - |> (fun formatter -> - KernelAttribs.fold ~init:formatter ~f:(fun formatter (kernel_item, attribs) -> - formatter - |> Fmt.fmt " " |> pp_lr1item ~do_pp_prec:false kernel_item - |> Fmt.fmt "\n" - |> html "
          \n" - |> (fun formatter -> - Attribs.fold ~init:formatter - ~f:(fun formatter Attrib.{conflict_state_index; contrib; _} -> - formatter - |> Fmt.fmt " " |> html "
        • " - |> pp_state_index conflict_state_index - |> Fmt.fmt " : " - |> pp_contrib contrib - |> html "
        • " |> Fmt.fmt "\n" - ) attribs - ) - |> html "
        \n" - ) kernel_attribs - ) - |> html "
      \n" - |> html "
    • \n" - end - ) - |> html "
    \n" - |> html "
  • \n" - ) t.states - ) - |> html "
\n" - |> html "\n" - |> html "\n" - |> (match description with - | DescriptionTxt -> Io.with_txt io - | DescriptionHtml -> Io.with_html io - ) - -let to_txt conf io t = - to_description conf io DescriptionTxt t - -let to_html conf io t = - to_description conf io DescriptionHtml t - -let to_hocc io t = - let io = io.log |> Fmt.fmt "hocc: Generating hocc report\n" |> Io.with_log io in - io.hocc - |> Fmt.fmt "hocc\n" - |> (fun formatter -> - Precs.fold ~init:formatter ~f:(fun formatter prec -> - formatter |> Prec.src_fmt prec - ) t.precs - ) - |> (fun formatter -> - Symbols.symbols_fold ~init:formatter ~f:(fun formatter symbol -> - match Symbol.is_token symbol && not (Symbol.is_synthetic symbol) with - | false -> formatter - | true -> formatter |> Symbols.src_fmt symbol t.symbols - ) t.symbols - ) - |> (fun formatter -> - Symbols.symbols_fold ~init:formatter ~f:(fun formatter symbol -> - match Symbol.is_nonterm symbol && not (Symbol.is_synthetic symbol) with - | false -> formatter - | true -> formatter |> Symbols.src_fmt symbol t.symbols - ) t.symbols - ) - |> Io.with_hocc io - -let to_hmi conf _hmhi io _t = - let io = - io.hmi - |> Fmt.fmt "XXX not implemented\n" - |> Fmt.fmt (Path.Segment.to_string_hlt (Conf.module_ conf)) - |> Fmt.fmt ".hmi\n" - |> Io.with_hmi io - in - io - -let to_hm conf _hmh io _t = - let io = - io.hm - |> Fmt.fmt "XXX not implemented\n" - |> Fmt.fmt (Path.Segment.to_string_hlt (Conf.module_ conf)) - |> Fmt.fmt ".hm\n" - |> Io.with_hm io - in - io - -let to_mli _conf _hmhi _io _t = - not_implemented "XXX" - -let to_ml _conf _hmh _io _t = - not_implemented "XXX" diff --git a/bootstrap/bin/hocc/spec.mli b/bootstrap/bin/hocc/spec.mli index 3c7d70d5f..5edf461ce 100644 --- a/bootstrap/bin/hocc/spec.mli +++ b/bootstrap/bin/hocc/spec.mli @@ -16,39 +16,20 @@ type t = { prods: Prods.t; (** Productions. *) - reductions: Reductions.t; - (** Reductions. *) + callbacks: Callbacks.t; + (** Reduction callbacks. *) states: State.t array; (** Generated states. *) } -val init: Conf.algorithm -> resolve:bool -> Io.t -> Parse.hmh -> Io.t * t +val synthetic_name_of_start_name: string -> string +(** [synthetic_name_of_start_name start_name] returns a synthetic symbol name based on [start_name], + e.g. "Start" -> "Start'". *) + +val init: Conf.algorithm -> resolve:bool -> Io.t -> Parse.nonterm_hmh -> Io.t * t (** [init algorithm ~resolve io hmh] creates a specification using the specified [algorithm] on [hmh], with conflicts optionally resolved, and all resulting I/O based on [io]. *) val conflicts: t -> uns (** [conflicts t] returns the number of grammar conflicts in [t]. *) - -val to_txt: Conf.t -> Io.t -> t -> Io.t -(** [to_txt conf io t] integrates a text representation of [t] into [io]. *) - -val to_html: Conf.t -> Io.t -> t -> Io.t -(** [to_html conf io t] integrates an html representation of [t] into [io]. *) - -val to_hocc: Io.t -> t -> Io.t -(** [to_hocc conf io t] integrates a hocc representation of [t]'s grammar into [io]. States are - omitted since they have no hocc representation. *) - -val to_hmi: Conf.t -> Parse.hmhi -> Io.t -> t -> Io.t -(** [to_hmi conf hmhi io t] integrates a Hemlock interface (.hmi) representation of [t] into [io]. -*) - -val to_hm: Conf.t -> Parse.hmh -> Io.t -> t -> Io.t -(** [to_hm conf hmh io t] integrates a Hemlock (.hm) representation of [t] into [io]. *) - -val to_mli: Conf.t -> Parse.hmhi -> Io.t -> t -> Io.t -(** [to_mli conf hmhi io t] integrates an OCaml interface (.mli) representation of [t] into [io]. *) - -val to_ml: Conf.t -> Parse.hmh -> Io.t -> t -> Io.t -(** [to_ml conf hmh io t] integrates an OCaml (.ml) representation of [t] into [io]. *) diff --git a/bootstrap/bin/hocc/state.ml b/bootstrap/bin/hocc/state.ml index 1504f9f53..06102ea65 100644 --- a/bootstrap/bin/hocc/state.ml +++ b/bootstrap/bin/hocc/state.ml @@ -259,6 +259,9 @@ let index {statenub={lr1itemsetclosure={index; _}; _}; _} = let is_start {statenub={lr1itemsetclosure={kernel; _}; _}; _} = Lr1Itemset.is_start kernel +let start_symbol_index {statenub={lr1itemsetclosure={kernel; _}; _}; _} = + Lr1Itemset.start_symbol_index kernel + let has_pseudo_end_conflict {actions; _} = match Ordmap.mem Symbol.pseudo_end.index actions, Ordmap.length actions with | false, _ diff --git a/bootstrap/bin/hocc/state.mli b/bootstrap/bin/hocc/state.mli index 608749ba0..fe82e9633 100644 --- a/bootstrap/bin/hocc/state.mli +++ b/bootstrap/bin/hocc/state.mli @@ -59,6 +59,10 @@ val index: t -> Index.t val is_start: t -> bool (** [is_start t] returns true if [t] is a start state. *) +val start_symbol_index: t -> Symbol.Index.t +(** [start_symbol_index t] returns the start symbol index of [t], where [t] must be a start state. +*) + val has_pseudo_end_conflict: t -> bool (** [has_pseudo_end_conflict t] returns true if the state conflicts on the pseudo-end (⊥) symbol. *) diff --git a/bootstrap/bin/hocc/symbol.ml b/bootstrap/bin/hocc/symbol.ml index 31ebadd9c..4a2a3e056 100644 --- a/bootstrap/bin/hocc/symbol.ml +++ b/bootstrap/bin/hocc/symbol.ml @@ -3,8 +3,8 @@ open! Basis.Rudiments module T = struct type stmt = - | Token of Parse.token - | Nonterm of Parse.nonterm + | Token of Parse.nonterm_token + | Nonterm of Parse.nonterm_nonterm let pp_stmt stmt formatter = match stmt with @@ -69,12 +69,13 @@ let init_token ~index ~name ~qtype ~prec ~stmt ~alias = let follow = Ordset.empty (module Index) in {index; name; qtype; prec; stmt; alias; start; prods; first; follow} -let init_implicit ~index ~name ~alias = - init_token ~index ~name ~qtype:QualifiedType.implicit ~prec:None ~stmt:None ~alias:(Some alias) +let init_synthetic_token ~index ~name ~alias = + init_token ~index ~name ~qtype:QualifiedType.synthetic_implicit ~prec:None ~stmt:None + ~alias:(Some alias) -let epsilon = init_implicit ~index:0L ~name:"EPSILON" ~alias:"ε" +let epsilon = init_synthetic_token ~index:0L ~name:"EPSILON" ~alias:"ε" -let pseudo_end = init_implicit ~index:1L ~name:"PSEUDO_END" ~alias:"⊥" +let pseudo_end = init_synthetic_token ~index:1L ~name:"PSEUDO_END" ~alias:"⊥" let init_nonterm ~index ~name ~qtype ~prec ~stmt ~start ~prods = let stmt = match stmt with diff --git a/bootstrap/bin/hocc/symbol.mli b/bootstrap/bin/hocc/symbol.mli index 247263368..932acabfd 100644 --- a/bootstrap/bin/hocc/symbol.mli +++ b/bootstrap/bin/hocc/symbol.mli @@ -5,8 +5,8 @@ open! Basis.Rudiments (** Declaration AST. *) type stmt = - | Token of Parse.token - | Nonterm of Parse.nonterm + | Token of Parse.nonterm_token + | Nonterm of Parse.nonterm_nonterm module Index = SymbolIndex type t = { @@ -50,11 +50,12 @@ val pseudo_end: t (** [pseudo_end] returns a pseudo-end (⊥) symbol. *) val init_token: index:Index.t -> name:string -> qtype:QualifiedType.t -> prec:Prec.t option - -> stmt:Parse.token option -> alias:string option -> t + -> stmt:Parse.nonterm_token option -> alias:string option -> t (** Used only by [Symbols.insert_token]. *) val init_nonterm: index:Index.t -> name:string -> qtype:QualifiedType.t -> prec:Prec.t option - -> stmt:Parse.nonterm option -> start:bool -> prods:(Prod.t, Prod.cmper_witness) Ordset.t -> t + -> stmt:Parse.nonterm_nonterm option -> start:bool -> prods:(Prod.t, Prod.cmper_witness) Ordset.t + -> t (** Used only by [Symbols.insert_nonterm]. *) val is_token: t -> bool diff --git a/bootstrap/bin/hocc/symbols.ml b/bootstrap/bin/hocc/symbols.ml index 0a589c53a..81c043a55 100644 --- a/bootstrap/bin/hocc/symbols.ml +++ b/bootstrap/bin/hocc/symbols.ml @@ -43,6 +43,9 @@ let info_of_alias alias ({aliases; tokens; _} as t) = | None -> None | Some symbol_index -> info_of_name Symbol.((Ordmap.get_hlt symbol_index tokens).name) t +let info_of_alias_hlt alias t = + Option.value_hlt (info_of_alias alias t) + let insert_token ~name ~qtype ~prec ~stmt ~alias ({infos; names; aliases; symbols; tokens; _} as t) = let index = Map.length infos in diff --git a/bootstrap/bin/hocc/symbols.mli b/bootstrap/bin/hocc/symbols.mli index 5276af9ce..33cb4475e 100644 --- a/bootstrap/bin/hocc/symbols.mli +++ b/bootstrap/bin/hocc/symbols.mli @@ -16,8 +16,8 @@ type info = { (** Optional token alias. *) qtype: QualifiedType.t; - (** Qualified type, e.g. [Implicit] for [token SOME_TOKEN], or [Explicit {module_:Zint; type:t}] - for [token INT of Zint.t. *) + (** Qualified type, e.g. [explicit_opt=None] for [token SOME_TOKEN], or [explicit_opt=Some + {module_:Zint; type:t}] for [token INT of Zint.t. *) } type t @@ -26,7 +26,7 @@ val empty: t (** [empty] returns an empty set of symbols. *) val insert_token: name:string -> qtype:QualifiedType.t -> prec:Prec.t option - -> stmt:Parse.token option -> alias:string option -> t -> t + -> stmt:Parse.nonterm_token option -> alias:string option -> t -> t (** [insert_token ~name ~qtype ~prec ~stmt ~alias t] creates a token [Symbol.t] with unique index and returns a new [t] with the symbol inserted. *) @@ -34,8 +34,8 @@ val insert_nonterm_info: name:string -> qtype:QualifiedType.t -> t -> t (** [insert_nonterm_info ~name ~qtype t] creates a non-terminal [info] and returns a new [t] with the info inserted. This is a precursor to a subsequent [insert_nonterm] call. *) -val insert_nonterm: name:string -> prec:Prec.t option -> stmt:Parse.nonterm option -> start:bool - -> prods:(Prod.t, Prod.cmper_witness) Ordset.t -> t -> t +val insert_nonterm: name:string -> prec:Prec.t option -> stmt:Parse.nonterm_nonterm option + -> start:bool -> prods:(Prod.t, Prod.cmper_witness) Ordset.t -> t -> t (** [insert_token ~name ~prec ~stmt ~start ~prods t] creates a non-terminal [Symbol.t] with unique index and returns a new [t] with the symbol inserted. *) @@ -56,6 +56,10 @@ val info_of_alias: string -> t -> info option (** [info_of_alias alias t] returns [Some info] if a symbol with the specified [alias] exists, [None] otherwise. Note that names and aliases are in separate namespaces. *) +val info_of_alias_hlt: string -> t -> info +(** [info_of_alias alias t] returns [Some info] if a symbol with the specified [alias] exists, halts + otherwise. Note that names and aliases are in separate namespaces. *) + val symbol_index_of_name: string -> t -> Symbol.Index.t option (** [symbol_index_of_name name t] returns [Some index] if a symbol with the specified [name] exists, [None] otherwise. *) diff --git a/bootstrap/test/hocc/Binding_error.expected b/bootstrap/test/hocc/Binding_error.expected new file mode 100644 index 000000000..6eb8a6ba7 --- /dev/null +++ b/bootstrap/test/hocc/Binding_error.expected @@ -0,0 +1,3 @@ +hocc: Parsing "./Binding_error.hmh" +hocc: Generating LR(1) specification +hocc: At ["./Binding_error.hmh":8:8.."./Binding_error.hmh":8:10): Cannot bind to empty symbol variant: t:T diff --git a/bootstrap/test/hocc/Binding_error.hmh b/bootstrap/test/hocc/Binding_error.hmh new file mode 100644 index 000000000..37c550a99 --- /dev/null +++ b/bootstrap/test/hocc/Binding_error.hmh @@ -0,0 +1,8 @@ +hocc + token T + token U of Unit.t + token EOI + start S of Token.t ::= + | T EOI -> T + | u:U EOI -> U u + | t:T U EOI -> U t # Invalid binding; `T` has no payload. diff --git a/bootstrap/test/hocc/Example.expected b/bootstrap/test/hocc/Example.expected index 2842126a5..0dc3dd540 100644 --- a/bootstrap/test/hocc/Example.expected +++ b/bootstrap/test/hocc/Example.expected @@ -11,3 +11,5 @@ hocc: 0 remergeable states hocc: Searching for unused precedences/tokens/non-terminals/productions hocc: Generating text report hocc: Writing "./hocc/Example.txt" +hocc: Writing "./Example.hmi" +hocc: Writing "./Example.hm" diff --git a/bootstrap/test/hocc/Example.expected.hm b/bootstrap/test/hocc/Example.expected.hm new file mode 100644 index 000000000..289c98ad7 --- /dev/null +++ b/bootstrap/test/hocc/Example.expected.hm @@ -0,0 +1,1408 @@ +# This file was generated by `hocc` based on "Example.hmh" +[:"./Example.hmh":1]open import Basis + +# Specify the parser. `hocc ...` expands to a module implementation, `{ ... }`. +include [:]{ + Spec = { + Algorithm = { + T = { + type t: t = + | Lr1 + | Ielr1 + | Pgm1 + | Lalr1 + + index = function + | Lr1 -> 0 + | Ielr1 -> 1 + | Pgm1 -> 2 + | Lalr1 -> 3 + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + to_string = function + | Lr1 -> "Lr1" + | Ielr1 -> "Ielr1" + | Pgm1 -> "Pgm1" + | Lalr1 -> "Lalr1" + + pp t formatter = + formatter |> Fmt.fmt (to_string t) + } + include T + include Identifiable.Make(T) + } + + algorithm = Algorithm.Lr1 + + Assoc = { + T = { + type t: t = + | Left + | Right + + index = function + | Left -> 0 + | Right -> 1 + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + to_string = function + | Left -> "Left" + | Right -> "Right" + + pp t formatter = + formatter |> Fmt.fmt (to_string t) + } + include T + include Identifiable.Make(T) + } + + Prec = { + T = { + type t: t = { + index: uns + name: string + assoc: option Assoc.t + doms: Ordset.t uns Uns.cmper_witness + } + + index {index; _} = + index + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + pp {index; name; assoc; doms} formatter = + formatter + |> Fmt.fmt + "{%u=(^index + ^); %s=(^name + ^); %f(^Option.pp Assoc.pp^)=(^assoc + ^); %f(^Ordset.pp^)=(^doms + ^)}" + } + include T + include Identifiable.Make(T) + + init ~index ~name ~assoc ~doms = + {index; name; assoc; doms} + } + + precs = [| + Prec.init ~index:0 ~name:"mul" ~assoc:(Some Left) ~doms:(Ordset.empty Uns) + Prec.init ~index:1 ~name:"add" ~assoc:(Some Left) ~doms:(Ordset.singleton Uns 0) + |] + + Prod = { + T = { + type t: t = { + index: uns + lhs_index: uns + rhs_indexes: array uns + prec: option Prec.t + callback: uns + } + + hash_fold {index; _} state = + Uns.hash_fold index state + + cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + pp {index; lhs_index; rhs_indexes; prec; callback} formatter = + formatter + |> Fmt.fmt + "{%u=(^index + ^); %u=(^lhs_index + ^); %f(^Array.pp Uns.pp^)=(^rhs_indexes + ^); %f(^Option.pp Prec.pp^)=(^prec + ^); %u=(^callback + ^)}" + } + include T + include Identifiable.Make(T) + + init ~index ~lhs_index ~rhs_indexes ~prec ~callback = + {index; lhs_index; rhs_indexes; prec; callback} + } + + prods = [| + Prod.init ~index:0 ~lhs_index:8 ~rhs_indexes:[|2|] + ~prec:None ~callback:0 + Prod.init ~index:1 ~lhs_index:8 ~rhs_indexes:[|3|] + ~prec:None ~callback:1 + Prod.init ~index:2 ~lhs_index:9 ~rhs_indexes:[|4|] + ~prec:None ~callback:2 + Prod.init ~index:3 ~lhs_index:9 ~rhs_indexes:[|5|] + ~prec:None ~callback:3 + Prod.init ~index:4 ~lhs_index:10 ~rhs_indexes:[|10; 8; 10|] + ~prec:(Some (Array.get 0 precs)) ~callback:4 + Prod.init ~index:5 ~lhs_index:10 ~rhs_indexes:[|10; 9; 10|] + ~prec:(Some (Array.get 1 precs)) ~callback:5 + Prod.init ~index:6 ~lhs_index:10 ~rhs_indexes:[|6|] + ~prec:None ~callback:6 + Prod.init ~index:7 ~lhs_index:11 ~rhs_indexes:[|10; 7|] + ~prec:None ~callback:7 + Prod.init ~index:8 ~lhs_index:12 ~rhs_indexes:[|11; 1|] + ~prec:None ~callback:8 + |] + + Symbol = { + T = { + type t: t = { + index: uns + name: string + prec: option Prec.t + alias: option string + start: bool + prods: Ordset.t Prod.t Prod.cmper_witness + first: Ordset.t uns Uns.cmper_witness + follow: Ordset.t uns Uns.cmper_witness + } + + hash_fold {index; _} state = + Uns.hash_fold index state + + cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + pp {index; name; prec; alias; start; prods; first; follow} formatter = + formatter + |> Fmt.fmt + "{%u=(^index + ^); %s=(^name + ^); %f(^Option.pp Prec.pp^)=(^prec + ^); %f(^Option.pp String.pp^)=(^alias + ^); %b=(^start + ^); %f(^Ordset.pp^)=(^prods + ^); %f(^Ordset.pp^)=(^first + ^); %f(^Ordset.pp^)=(^follow + ^)}" + } + include T + include Identifiable.Make(T) + + init ~index ~name ~prec ~alias ~start ~prods ~first ~follow = + {index; name; prec; alias; start; prods; first; follow} + } + + symbols = [| + Symbol.init ~index:0 ~name:"EPSILON" + ~prec:None ~alias:(Some "ε") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 0) + ~follow:(Ordset.empty Uns) + Symbol.init ~index:1 ~name:"PSEUDO_END" + ~prec:None ~alias:(Some "⊥") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 1) + ~follow:(Ordset.singleton Uns 0) + Symbol.init ~index:2 ~name:"STAR" + ~prec:(Some (Array.get 0 precs)) ~alias:(Some "*") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 2) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:3 ~name:"SLASH" + ~prec:(Some (Array.get 0 precs)) ~alias:(Some "/") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 3) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:4 ~name:"PLUS" + ~prec:(Some (Array.get 1 precs)) ~alias:(Some "+") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 4) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:5 ~name:"MINUS" + ~prec:(Some (Array.get 1 precs)) ~alias:(Some "-") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 5) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:6 ~name:"INT" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 6) + ~follow:(Ordset.of_list Uns [2; 3; 4; 5; 7]) + Symbol.init ~index:7 ~name:"EOI" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 7) + ~follow:(Ordset.singleton Uns 1) + Symbol.init ~index:8 ~name:"MulOp" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.of_list Prod [ + Array.get 0 prods + Array.get 1 prods + ]) ~first:(Ordset.of_list Uns [2; 3]) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:9 ~name:"AddOp" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.of_list Prod [ + Array.get 2 prods + Array.get 3 prods + ]) ~first:(Ordset.of_list Uns [4; 5]) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:10 ~name:"Expr" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.of_list Prod [ + Array.get 4 prods + Array.get 5 prods + Array.get 6 prods + ]) ~first:(Ordset.singleton Uns 6) + ~follow:(Ordset.of_list Uns [2; 3; 4; 5; 7]) + Symbol.init ~index:11 ~name:"Answer" + ~prec:None ~alias:None ~start:true + ~prods:(Ordset.singleton Prod (Array.get 7 prods)) ~first:(Ordset.singleton Uns 6) + ~follow:(Ordset.singleton Uns 1) + Symbol.init ~index:12 ~name:"Answer'" + ~prec:None ~alias:None ~start:true + ~prods:(Ordset.singleton Prod (Array.get 8 prods)) ~first:(Ordset.singleton Uns 6) + ~follow:(Ordset.singleton Uns 0) + |] + + Lr0Item = { + T = { + type t: t = { + prod: Prod.t + dot: uns + } + + hash_fold {prod; dot} state = + state + |> Prod.hash_fold prod + |> Uns.hash_fold dot + + cmp {prod=p0; dot=d0} {prod=p1; dot=d1} = + let open Cmp + match Prod.cmp p0 p1 with + | Lt -> Lt + | Eq -> Uns.cmp d0 d1 + | Gt -> Gt + + pp {prod; dot} formatter = + formatter |> Fmt.fmt "{%f(^Prod.pp^)=(^prod^); %u=(^dot^)}" + } + include T + include Identifiable.Make(T) + + init ~prod ~dot = + {prod; dot} + } + + Lr1Item = { + T = { + type t: t = { + lr0item: Lr0Item.t + follow: Ordset.t uns Uns.cmper_witness + } + + hash_fold {lr0item; follow} state = + state + |> Lr0Item.hash_fold lr0item + |> Ordset.hash_fold follow + + cmp {lr0item=l0; follow=f0} {lr0item=l1; follow=f1} = + let open Cmp + match Lr0Item.cmp l0 l1 with + | Lt -> Lt + | Eq -> Ordset.cmp f0 f1 + | Gt -> Gt + + pp {lr0item; follow} formatter = + formatter + |> Fmt.fmt "{%f(^Lr0Item.pp^)=(^lr0item^); %f(^Ordset.pp^)=(^follow^)}" + } + include T + include Identifiable.Make(T) + + init ~lr0item ~follow = + {lr0item; follow} + } + + Lr1Itemset = { + T = { + type t: t = Ordmap.t Lr0Item.t Lr1Item.t Lr0Item.cmper_witness + + hash_fold = Ordmap.hash_fold Lr1Item.hash_fold + cmp = Ordmap.cmp Lr1Item.cmp + pp = Ordmap.pp Lr1Item.pp + } + include T + include Identifiable.Make(T) + + empty = Ordmap.empty Lr0Item + + init = Ordmap.of_alist Lr0Item + } + + Lr1ItemsetClosure = { + T = { + type t: t = { + index: uns + kernel: Lr1Itemset.t + added: Lr1Itemset.t + } + + hash_fold {index; _} state = + state |> Uns.hash_fold index + + cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + pp {index; kernel; added} formatter = + formatter + |> Fmt.fmt + "{%u=(^index + ^); %f(^Lr1Itemset.pp^)=(^kernel + ^); %f(^Lr1Itemset.pp^)=(^added + ^)}" + } + include T + include Identifiable.Make(T) + + init ~index ~kernel ~added = + {index; kernel; added} + } + + Action = { + T = { + type t: t = + | ShiftPrefix of uns + | ShiftAccept of uns + | Reduce of uns + + constructor_index = function + | ShiftPrefix _ -> 0 + | ShiftAccept _ -> 1 + | Reduce _ -> 2 + + arg_index = function + | ShiftPrefix arg_index + | ShiftAccept arg_index + | Reduce arg_index -> arg_index + + hash_fold t state = + state + |> Uns.hash_fold (constructor_index t) + |> Uns.hash_fold (arg_index t) + + cmp t0 t1 = + let open Cmp + match Uns.cmp (constructor_index t0) (constructor_index t1) with + | Lt -> Lt + | Eq -> Uns.cmp (arg_index t0) (arg_index t1) + | Gt -> Gt + + to_string = function + | ShiftPrefix state_index -> "ShiftPrefix %u(^state_index^)" + | ShiftAccept state_index -> "ShiftAccept %u(^state_index^)" + | Reduce prod_index -> "Reduce %u(^prod_index^)" + + pp t formatter = + formatter |> Fmt.fmt (to_string t) + } + include T + include Identifiable.Make(T) + } + + State = { + T = { + type t: t = { + lr1ItemsetClosure: Lr1ItemsetClosure.t + actions: Map.t uns Action.t Uns.cmper_witness + gotos: Map.t uns uns Uns.cmper_witness + } + + hash_fold {lr1ItemsetClosure; _} state = + state |> Lr1ItemsetClosure.hash_fold lr1ItemsetClosure + + cmp {lr1ItemsetClosure=c0; _} {lr1ItemsetClosure=c1; _} = + Lr1ItemsetClosure.cmp c0 c1 + + pp {lr1ItemsetClosure; actions; gotos} formatter = + formatter + |> Fmt.fmt + "{%f(^Lr1ItemsetClosure.pp^)=(^lr1ItemsetClosure + ^); %f(^Map.pp Action.pp^)=(^actions + ^); %f(^Map.pp Uns.pp^)=(^gotos + ^)}" + } + include T + include Identifiable.Make(T) + + init ~lr1ItemsetClosure ~actions ~gotos = + {lr1ItemsetClosure; actions; gotos} + } + + states = [| + (* 0 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:0 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 8 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [0] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 6 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 7 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [1] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 6, Action.ShiftPrefix 1 + ] + ~gotos: + Map.of_alist Uns [ + 10, 2 + 11, 3 + ] + (* 1 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:1 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 6 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 2, Action.Reduce 6 + 3, Action.Reduce 6 + 4, Action.Reduce 6 + 5, Action.Reduce 6 + 7, Action.Reduce 6 + ] + ~gotos: + Map.empty Uns + (* 2 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:2 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 7 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [1] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 0 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 1 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 2 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 3 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 2, Action.ShiftPrefix 4 + 3, Action.ShiftPrefix 5 + 4, Action.ShiftPrefix 6 + 5, Action.ShiftPrefix 7 + 7, Action.ShiftAccept 8 + ] + ~gotos: + Map.of_alist Uns [ + 8, 9 + 9, 10 + ] + (* 3 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:3 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 8 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [0] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 1, Action.ShiftPrefix 11 + ] + ~gotos: + Map.empty Uns + (* 4 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:4 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 0 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 6, Action.Reduce 0 + ] + ~gotos: + Map.empty Uns + (* 5 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:5 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 1 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 6, Action.Reduce 1 + ] + ~gotos: + Map.empty Uns + (* 6 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:6 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 2 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 6, Action.Reduce 2 + ] + ~gotos: + Map.empty Uns + (* 7 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:7 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 3 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 6, Action.Reduce 3 + ] + ~gotos: + Map.empty Uns + (* 8 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:8 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 7 prods) ~dot:2 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [1] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 1, Action.Reduce 7 + ] + ~gotos: + Map.empty Uns + (* 9 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:9 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:2 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 6 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 6, Action.ShiftPrefix 1 + ] + ~gotos: + Map.of_alist Uns [ + 10, 12 + ] + (* 10 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:10 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:2 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 6 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 6, Action.ShiftPrefix 1 + ] + ~gotos: + Map.of_alist Uns [ + 10, 13 + ] + (* 11 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:11 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 8 prods) ~dot:2 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [0] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 0, Action.Reduce 8 + ] + ~gotos: + Map.empty Uns + (* 12 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:12 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:3 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 0 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 1 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 2 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 3 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 2, Action.Reduce 4 + 3, Action.Reduce 4 + 4, Action.Reduce 4 + 5, Action.Reduce 4 + 7, Action.Reduce 4 + ] + ~gotos: + Map.of_alist Uns [ + 8, 9 + 9, 10 + ] + (* 13 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:13 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:3 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 0 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 1 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 2 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 3 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 2, Action.ShiftPrefix 4 + 3, Action.ShiftPrefix 5 + 4, Action.Reduce 5 + 5, Action.Reduce 5 + 7, Action.Reduce 5 + ] + ~gotos: + Map.of_alist Uns [ + 8, 9 + 9, 10 + ] + |] + } + + Token = { + T = { + type t: t = + | EPSILON # "ε" + | PSEUDO_END # "⊥" + | STAR # "*" + | SLASH # "/" + | PLUS # "+" + | MINUS # "-" + | INT of Zint.t + | EOI + + index = function + | EPSILON -> 0 + | PSEUDO_END -> 1 + | STAR -> 2 + | SLASH -> 3 + | PLUS -> 4 + | MINUS -> 5 + | INT _ -> 6 + | EOI -> 7 + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + spec t = + Array.get (index t) Spec.symbols + + pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + } + include T + include Identifiable.Make(T) + } + + Nonterm = { + T = { + type t: t = + | MulOp of Token.t + | AddOp of Token.t + | Expr of Zint.t + | Answer of Zint.t + | Answer' of Zint.t + + index = function + | MulOp _ -> 8 + | AddOp _ -> 9 + | Expr _ -> 10 + | Answer _ -> 11 + | Answer' _ -> 12 + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + spec t = + Array.get (index t) Spec.symbols + + pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + } + include T + include Identifiable.Make(T) + } + + Symbol = { + T = { + type t: t = + | Token of Token.t + | Nonterm of Nonterm.t + + index = function + | Token token -> Token.index token + | Nonterm nonterm -> Nonterm.index nonterm + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + spec = function + | Token token -> Token.spec token + | Nonterm nonterm -> Nonterm.spec nonterm + + pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + } + include T + include Identifiable.Make(T) + } + + State = { + T = { + type t: t = uns + + hash_fold t state = + state |> Uns.hash_fold t + + cmp t0 t1 = + Uns.cmp t0 t1 + + spec t = + Array.get t Spec.states + + pp t formatter = + formatter |> Uns.pp t + } + include T + include Identifiable.Make(T) + + init state_index = + state_index + } + + Stack = { + Elm = { + T = { + type t: t = { + symbol: Symbol.t; + state: State.t; + } + + hash_fold {symbol; state} hash_state = + hash_state + |> Symbol.hash_fold symbol + |> State.hash_fold state + + cmp {symbol=symbol0; state=state0} {symbol=symbol1; state=state1} = + let open Cmp + match State.cmp state0 state1 with + | Lt -> Lt + | Eq -> Symbol.cmp symbol0 symbol1 + | Gt -> Gt + + let pp {symbol; state} formatter = + formatter |> Fmt.fmt "{%f(^Symbol.pp^)=(^symbol^); %f(^State.pp^)=(^state^)}" + } + include T + include Identifiable.Make(T) + + init ~symbol ~state = + {symbol; state} + } + + type t: t = list Elm.t + + fmt ?(alt=false) ?(width=0) t formatter = + formatter |> List.fmt ~alt ~width Elm.pp t + + pp t formatter = + formatter |> fmt t + + Reduction = { + T = { + type stack: stack = t + type t: t = uns + type callback: callback = stack -> Symbol.t * stack + + hash_fold t state = + state |> Uns.hash_fold t + + cmp t0 t1 = + Uns.cmp t0 t1 + + pp t formatter = + formatter |> Uns.pp t + } + include T + include Identifiable.Make(T) + + callbacks = [| + (* 0 *) function + | _ + :: tl -> Symbol.Nonterm (MulOp ( + # ________________________________________________________________________________ + [:"./Example.hmh":9:4+11]STAR[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 1 *) function + | _ + :: tl -> Symbol.Nonterm (MulOp ( + # ________________________________________________________________________________ + [:"./Example.hmh":10:4+11]SLASH[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 2 *) function + | _ + :: tl -> Symbol.Nonterm (AddOp ( + # ________________________________________________________________________________ + [:"./Example.hmh":16:4+11]PLUS[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 3 *) function + | _ + :: tl -> Symbol.Nonterm (AddOp ( + # ________________________________________________________________________________ + [:"./Example.hmh":17:4+11]MINUS[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 4 *) function + | {symbol=Symbol.Nonterm (Expr e1); _} + :: {symbol=Symbol.Nonterm (MulOp op); _} + :: {symbol=Symbol.Nonterm (Expr e0); _} + :: tl -> Symbol.Nonterm (Expr ( + # ________________________________________________________________________________ + [:"./Example.hmh":22:8+0]match op with + | STAR -> Zint.(e0 * e1) + | SLASH -> Zint.(e0 / e1) + | _ -> not_reached ()[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 5 *) function + | {symbol=Symbol.Nonterm (Expr e1); _} + :: {symbol=Symbol.Nonterm (AddOp op); _} + :: {symbol=Symbol.Nonterm (Expr e0); _} + :: tl -> Symbol.Nonterm (Expr ( + # ________________________________________________________________________________ + [:"./Example.hmh":27:8+0]match op with + | PLUS -> Zint.(e0 + e1) + | MINUS -> Zint.(e0 - e1) + | _ -> not_reached ()[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 6 *) function + | {symbol=Symbol.Token (INT x); _} + :: tl -> Symbol.Nonterm (Expr ( + # ________________________________________________________________________________ + [:"./Example.hmh":31:4+13]x[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 7 *) function + | _ + :: {symbol=Symbol.Nonterm (Expr e); _} + :: tl -> Symbol.Nonterm (Answer ( + # ________________________________________________________________________________ + [:"./Example.hmh":35:4+18]e[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 8 *) fn _stack -> not_reached () + |] + + callback t = + Array.get t callbacks + + init callback_index = + callback_index + } + + shift ~symbol ~state t = + (Elm.init ~symbol ~state) :: t + + # goto: Symbol.t -> t -> t + goto symbol t = + match t with + | [] -> not_reached () + | Elm.{state; _} :: _ -> + let symbol_index = Symbol.index symbol + let Spec.State.{gotos; _} = Array.get state Spec.states + let state' = Map.get_hlt symbol_index gotos |> State.init + shift ~symbol ~state:state' t + + reduce ~reduction t = + let callback = Reduction.callback reduction + let symbol, t' = callback t + goto symbol t' + } + + Status = { + T = { + type t: t = + | ShiftPrefix of Token.t * State.t + | ShiftAccept of Token.t * State.t + | Reduce of Token.t * Stack.Reduction.t + | Prefix + | Accept of Nonterm.t + | Reject of Token.t + + let constructor_index = function + | ShiftPrefix _ -> 0 + | ShiftAccept _ -> 1 + | Reduce _ -> 2 + | Prefix -> 3 + | Accept _ -> 4 + | Reject _ -> 5 + + let hash_fold t state = + state + |> Uns.hash_fold (constructor_index t) + |> fn hash_state -> + match t with + | ShiftPrefix (token, state) + | ShiftAccept (token, state) -> + hash_state |> State.hash_fold state |> Token.hash_fold token + | Reduce (token, reduction) -> + hash_state |> Stack.Reduction.hash_fold reduction |> Token.hash_fold token + | Prefix -> hash_state + | Accept nonterm -> hash_state |> Nonterm.hash_fold nonterm + | Reject token -> hash_state |> Token.hash_fold token + + let cmp t0 t1 = + let open Cmp + match Uns.cmp (constructor_index t0) (constructor_index t1) with + | Lt -> Lt + | Eq -> + match t0, t1 with + | ShiftPrefix (token0, state0), ShiftPrefix (token1, state1) + | ShiftAccept (token0, state0), ShiftAccept (token1, state1) -> + match State.cmp state0 state1 with + | Lt -> Lt + | Eq -> Token.cmp token0 token1 + | Gt -> Gt + | Reduce (token0, reduction0), Reduce (token1, reduction1) -> + match Stack.Reduction.cmp reduction0 reduction1 with + | Lt -> Lt + | Eq -> Token.cmp token0 token1 + | Gt -> Gt + | Prefix, Prefix -> Eq + | Accept nonterm0, Accept nonterm1 -> Nonterm.cmp nonterm0 nonterm1 + | Reject token0, Reject token1 -> Token.cmp token0 token1 + | _, _ -> not_reached () + | Gt -> Gt + + pp t formatter = + formatter + |> fn formatter -> + match t with + | ShiftPrefix (token, state) -> + formatter + |> Fmt.fmt "ShiftPrefix (%f(^Token.pp^)(^token^), %f(^State.pp^)(^state + ^))" + | ShiftAccept (token, state) -> + formatter + |> Fmt.fmt "ShiftAccept (%f(^Token.pp^)(^token^), %f(^State.pp^)(^state + ^))" + | Reduce reduction -> + formatter + |> Fmt.fmt "Reduce (%f(^Token.pp^)(^token^), %f(^Stack.Reduction.pp + ^)(^reduction^))" + | Prefix -> formatter |> Fmt.fmt "Prefix" + | Accept nonterm -> formatter |> Fmt.fmt "Accept %f(^Nonterm.pp^)(^nonterm^)" + | Reject token -> formatter |> Fmt.fmt "Reject %f(^Token.pp^)(^token^)" + } + include T + include Identifiable.Make(T) + } + + type t: t = { + stack: Stack.t + status: Status.t + } + + Start = { + Answer = { + boi = { + stack=[{ + symbol=Token Token.EPSILON + state=State.init 0 + }] + status=Prefix + } + } + } + + feed token = function + | {stack={state; _} :: _; status=Prefix} as t -> + let token_index = Token.index token + let Spec.State.{actions; _} = Array.get state Spec.states + let status = match Map.get token_index actions with + | Some (Spec.Action.ShiftPrefix state') -> Status.ShiftPrefix (token, state') + | Some (Spec.Action.ShiftAccept state') -> Status.ShiftAccept (token, state') + | Some (Spec.Action.Reduce prod_index) -> + let Spec.Prod.{callback=callback_index; _} = Array.get prod_index Spec.prods + let reduction = Stack.Reduction.init callback_index + Status.Reduce (token, reduction) + | None -> Status.Reject token + {t with status} + | _ -> not_reached () + + step {stack; status} = + let open Status + match status with + | ShiftPrefix (token, state) -> {stack=shift token state stack; status=Prefix} + | ShiftAccept (token, state) -> + # Shift, perform the ⊥ reduction, and extract the accepted symbol from the stack. + let stack = shift token state stack + let pseudo_end_index = Token.index Token.PSEUDO_END + let Spec.State.{actions; _} = Array.get state Spec.states + match Map.get_hlt pseudo_end_index actions with + | Spec.Action.Reduce prod_index -> + let Spec.Prod.{callback=callback_index; _} = Array.get prod_index Spec.prods + let reduction = Stack.Reduction.init callback_index + let stack = Stack.reduce ~reduction stack + match stack with + | [] -> not_reached () + | {symbol=Token _; _} :: _ -> not_reached () + | {symbol=Nonterm nonterm; _} :: _ -> {stack=[]; status=Accept nonterm} + | _ -> not_reached () + | Reduce (token, reduction) -> + feed token {stack=Stack.reduce ~reduction stack; status=Prefix} + | _ -> not_reached () + + # walk: t -> t + rec walk ({status; _} as t) = + let open Status + match status with + | ShiftPrefix _ + | ShiftAccept _ + | Reduce _ -> t |> step |> walk + | Prefix + | Accept _ + | Reject _ -> t + + next token ({status; _} as t) = + match status with + | Status.Prefix -> t |> feed token |> walk + | _ -> not_reached () + }[:"./Example.hmh":35:0+23] + +# Tokenize `s`, e.g. "2 + 3 * 4", and append an `EOI` token. +tokenize s = + s |> String.split_rev ~f:(fn cp -> Codepoint.(cp = ' ')) + |> List.rev_filter ~f:(fn s -> not (String.is_empty s)) + |> List.rev_map ~f:fn s -> + let open Token + match s with + | "*" -> STAR + | "/" -> SLASH + | "+" -> PLUS + | "-" -> MINUS + | _ -> INT (Zint.of_string s) + |> List.push Token.EOI + |> List.rev + +# Calculate the result of the arithmetic expression expressed in `s`, e.g. "2 + 3 * 4". +calculate s = + let {status; _} = List.fold_until (tokenize s) ~init:Start.Answer.boi ~f:fn parser tok -> + let {status; _} as parser' = Start.Answer.next tok parser + let done = match status with + | Prefix -> false + | Accept _ + | Error _ -> true + | _ -> not_reached () + parser', done + match status with + | Accept (Answer answer) -> answer + | Prefix _ -> halt "Partial input" + | Error _ -> halt "Parse error" + | _ -> not_reached () diff --git a/bootstrap/test/hocc/Example.expected.hmi b/bootstrap/test/hocc/Example.expected.hmi new file mode 100644 index 000000000..4351059c8 --- /dev/null +++ b/bootstrap/test/hocc/Example.expected.hmi @@ -0,0 +1,260 @@ +# This file was generated by `hocc` based on "Example.hmhi" +[:"./Example.hmhi":1]open import Basis + +# Export the parser API so that alternatives to `calculate` can be implemented. `hocc` expands to a +# module signature. +include [:]{ + Spec = { + Algorithm = { + type t: t = + | Lr1 [@doc "LR(1) algorithm."] + | Ielr1 [@doc "IELR(1) algorithm."] + | Pgm1 [@doc "PGM(1) algorithm."] + | Lalr1 [@doc "LALR(1) algorithm."] + + include IdentifiableIntf.S with type t := t + } + + algorithm: Algorithm.t + [@@doc "Algorithm used to generate parser."] + + Assoc = { + type t: t = + | Left + | Right + + include IdentifiableIntf.S with type t := t + } + + Prec = { + type t: t = { + index: uns # Index in `precs` array. + name: string + assoc: option Assoc.t + doms: Ordset.t uns Uns.cmper_witness # Indices in `precs` array of dominator + # precedences. + } + + include IdentifiableIntf.S with type t := t + } + + precs: array Prec.t + [@@doc "Array of precedences, where each element's `index` field corresponds to the + element's array index."] + + Prod = { + type t: t = { + index: uns # Index in `prods` array. + lhs_index: uns + rhs_indexes: array uns + prec: option Prec.t + callback: uns # Index of reduction callback in `Stack.Reduction.callbacks`. + } + + include IdentifiableIntf.S with type t := t + } + + prods: array Prod.t + [@@doc "Array of productions, where each element's `index` field corresponds to the + element's array index."] + + Symbol = { + type t: t = { + index: uns # Index in `symbols` array. + name: string + prec: option Prec.t + alias: option string + start: bool + prods: Ordset.t Prod.t Prod.cmper_witness # empty ≡ token + first: Ordset.t uns Uns.cmper_witness + follow: Ordset.t uns Uns.cmper_witness + } + + include IdentifiableIntf.S with type t := t + } + + symbols: array Symbol.t + [@@doc "Array of symbols, where each element's `index` field corresponds to the element's + array index."] + + Lr0Item = { + type t: t = { + prod: Prod.t + dot: uns + } + + include IdentifiableIntf.S with type t := t + } + + Lr1Item = { + type t: t = { + lr0item: Lr0Item.t + follow: Ordset.t uns Uns.cmper_witness + } + + include IdentifiableIntf.S with type t := t + } + + Lr1Itemset = { + type t: t = Ordmap.t Lr0Item.t Lr1Item.t Lr0Item.cmper_witness + + include IdentifiableIntf.S with type t := t + } + + Lr1ItemsetClosure = { + type t: t = { + index: uns # Index of corresponding `State.t` in `states` array. + kernel: Lr1Itemset.t + added: Lr1Itemset.t + } + + include IdentifiableIntf.S with type t := t + } + + Action = { + type t: t = + | ShiftPrefix of uns # `states` index. + | ShiftAccept of uns # `states` index. + | Reduce of uns # `prods` index. + + include IdentifiableIntf.S with type t := t + } + + State = { + type t: t = { + lr1ItemsetClosure: Lr1ItemsetClosure.t + actions: Map.t uns Action.t Uns.cmper_witness + gotos: Map.t uns uns Uns.cmper_witness + } + + include IdentifiableIntf.S with type t := t + } + + states: array State.t + [@@doc "Array of CFSM states, where each element's `lr1ItemsetClosure.index` field + corresponds to the element's array index."] + } + + Token = { + type t: t = + | EPSILON # "ε" + | PSEUDO_END # "⊥" + | STAR # "*" + | SLASH # "/" + | PLUS # "+" + | MINUS # "-" + | INT of Zint.t + | EOI + + include IdentifiableIntf.S with type t := t + + spec: t -> Spec.Symbol.t + } + + Nonterm = { + type t: t = + | MulOp of Token.t + | AddOp of Token.t + | Expr of Zint.t + | Answer of Zint.t + | Answer' of Zint.t + + include IdentifiableIntf.S with type t := t + + spec: t -> Spec.Symbol.t + } + + Symbol = { + type t: t = + | Token of Token.t + | Nonterm of Nonterm.t + + include IdentifiableIntf.S with type t := t + + spec: t -> Spec.Symbol.t + } + + State = { + type t: t = uns + + include IdentifiableIntf.S with type t := t + + spec: t -> Spec.State.t + } + + Stack = { + module Elm : sig + type t: t = { + symbol: Symbol.t; + state: State.t; + } + + include IdentifiableIntf.S with type t := t + end + + type t: t = Elm.t list + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + fmt >e: ?alt:bool -> ?width:uns -> t -> Fmt.Formatter e >e-> Fmt.Formatter e + + Reduction = { + type stack: stack = t + type t: t + type callback: callback = stack -> Symbol.t * stack + + include IdentifiableIntf.S with type t := t + + callbacks: array callback + [@@doc "Array of reduction callback functions containing embedded parser code."] + + callback: t -> callback + } + + shift: symbol:Symbol.t -> state:State.t -> t -> t + [@@doc "Perform a shift."] + + reduce: reduction:Reduction.t -> t -> t + [@@doc "Perform a reduction."] + } + + Status = { + type t: t = + # `feed`/`step` may produce these variants; `next` fast-forwards over them. + | ShiftPrefix of Token.t * State.t + | ShiftAccept of Token.t * State.t + | Reduce of Token.t * Stack.Reduction.t + # Common variants. + | Prefix # Valid parse prefix; more input needed. + | Accept of Nonterm.t # Successful parse result. + | Reject of Token.t # Syntax error due to unexpected token. + + include IdentifiableIntf.S with type t := t + } + + type t: t = { + stack: Stack.t + status: Status.t + } + + Start = { + Answer = { + boi: t + } + } + + feed: Token.t -> t -> t + [@@doc "`feed token t` returns a result with status in {`ShiftPrefix`, `ShiftAccept`, + `Reduce`, `Reject`}. `t.status` must be `Prefix`."] + + step: t -> t + [@@doc "`step t` returns the result of applying one state transition to `t`. `t.status` must + be in {`ShiftPrefix`, `ShiftAccept`, `Reduce`}."] + + next: Token.t -> t -> t + [@@doc "`next token t` calls `feed token t` and fast-forwards via `step` calls to return a + result with status in {`Prefix`, `Accept`, `Reject`}. `t.status` must be `Prefix`."] + }[:"./Example.hmhi":5:0+12] + +calculate: string -> zint + [@@doc "Calculate the result of a simple arithmetic expression comprising non-negative integers + and `+`, `-`, `*`, and `/` operators. Tokens must be separated by one or more spaces."] diff --git a/bootstrap/test/hocc/Example.expected.txt b/bootstrap/test/hocc/Example.expected.txt index c5c6538ea..048d2651a 100644 --- a/bootstrap/test/hocc/Example.expected.txt +++ b/bootstrap/test/hocc/Example.expected.txt @@ -53,7 +53,7 @@ Non-terminals Follow: {"⊥"} Productions Answer ::= Expr EOI - start Answer' + start Answer' of Zint.t First: {INT} Follow: {"ε"} Productions diff --git a/bootstrap/test/hocc/Example.hmh b/bootstrap/test/hocc/Example.hmh index c9c1f7a7e..ee39a67cb 100644 --- a/bootstrap/test/hocc/Example.hmh +++ b/bootstrap/test/hocc/Example.hmh @@ -20,12 +20,14 @@ include hocc nonterm Expr of Zint.t ::= | e0:Expr op:MulOp e1:Expr prec mul -> match op with - | MulOp STAR -> Zint.(e0 * e1) - | MulOp SLASH -> Zint.(e0 / e1) + | STAR -> Zint.(e0 * e1) + | SLASH -> Zint.(e0 / e1) + | _ -> not_reached () | e0:Expr op:AddOp e1:Expr prec add -> match op with - | AddOp PLUS -> Zint.(e0 + e1) - | AddOp MINUS -> Zint.(e0 - e1) + | PLUS -> Zint.(e0 + e1) + | MINUS -> Zint.(e0 - e1) + | _ -> not_reached () | x:INT -> x token EOI @@ -34,8 +36,8 @@ include hocc # Tokenize `s`, e.g. "2 + 3 * 4", and append an `EOI` token. tokenize s = - s |> String.split_rev ~f:(fn cp -> Codepoint.O.(cp = ' ')) - |> List.rev_filter ~f:(fn s -> String.length s <> 0) + s |> String.split_rev ~f:(fn cp -> Codepoint.(cp = ' ')) + |> List.rev_filter ~f:(fn s -> not (String.is_empty s)) |> List.rev_map ~f:fn s -> let open Token match s with @@ -49,15 +51,16 @@ tokenize s = # Calculate the result of the arithmetic expression expressed in `s`, e.g. "2 + 3 * 4". calculate s = - List.fold_until (tokenize s) ~init:Start.Answer.boi ~f:fn parser tok -> - let parser' = Start.Answer.next tok parser - let done = match status parser' with + let {status; _} = List.fold_until (tokenize s) ~init:Start.Answer.boi ~f:fn parser tok -> + let {status; _} as parser' = Start.Answer.next tok parser + let done = match status with | Prefix -> false | Accept _ | Error _ -> true + | _ -> not_reached () parser', done - |> - function - | Accept answer -> answer - | Prefix _ -> halt "Partial input" - | Error _ -> halt "Parse error" + match status with + | Accept (Answer answer) -> answer + | Prefix _ -> halt "Partial input" + | Error _ -> halt "Parse error" + | _ -> not_reached () diff --git a/bootstrap/test/hocc/Example.hmhi b/bootstrap/test/hocc/Example.hmhi index b49400610..476978777 100644 --- a/bootstrap/test/hocc/Example.hmhi +++ b/bootstrap/test/hocc/Example.hmhi @@ -4,6 +4,6 @@ open import Basis # module signature. include hocc -calulate: string -> zint +calculate: string -> zint [@@doc "Calculate the result of a simple arithmetic expression comprising non-negative integers and `+`, `-`, `*`, and `/` operators. Tokens must be separated by one or more spaces."] diff --git a/bootstrap/test/hocc/Example_b.expected b/bootstrap/test/hocc/Example_b.expected new file mode 100644 index 000000000..95c5cc3bb --- /dev/null +++ b/bootstrap/test/hocc/Example_b.expected @@ -0,0 +1,13 @@ +hocc: Parsing "./Example_b.hmhi" +hocc: Parsing "./Example_b.hmh" +hocc: Generating LR(1) specification +hocc: 2 precedences, 8 tokens, 5 non-terminals, 9 productions +hocc: LR(1) item set compatibility: lr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++++++++ +hocc: Generating 14 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Writing "./Example_b.hmi" +hocc: Writing "./Example_b.hm" diff --git a/bootstrap/test/hocc/Example_b.expected.hm b/bootstrap/test/hocc/Example_b.expected.hm new file mode 100644 index 000000000..b13b4083a --- /dev/null +++ b/bootstrap/test/hocc/Example_b.expected.hm @@ -0,0 +1,1416 @@ +# This file was generated by `hocc` based on "Example_b.hmh" +[:"./Example_b.hmh":1]open import Basis + +# The `hocc` keyword is on a continued line. Indentation should remain a multiple of 4. +include + [:]{ + Spec = { + Algorithm = { + T = { + type t: t = + | Lr1 + | Ielr1 + | Pgm1 + | Lalr1 + + index = function + | Lr1 -> 0 + | Ielr1 -> 1 + | Pgm1 -> 2 + | Lalr1 -> 3 + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + to_string = function + | Lr1 -> "Lr1" + | Ielr1 -> "Ielr1" + | Pgm1 -> "Pgm1" + | Lalr1 -> "Lalr1" + + pp t formatter = + formatter |> Fmt.fmt (to_string t) + } + include T + include Identifiable.Make(T) + } + + algorithm = Algorithm.Lr1 + + Assoc = { + T = { + type t: t = + | Left + | Right + + index = function + | Left -> 0 + | Right -> 1 + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + to_string = function + | Left -> "Left" + | Right -> "Right" + + pp t formatter = + formatter |> Fmt.fmt (to_string t) + } + include T + include Identifiable.Make(T) + } + + Prec = { + T = { + type t: t = { + index: uns + name: string + assoc: option Assoc.t + doms: Ordset.t uns Uns.cmper_witness + } + + index {index; _} = + index + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + pp {index; name; assoc; doms} formatter = + formatter + |> Fmt.fmt + "{%u=(^index + ^); %s=(^name + ^); %f(^Option.pp Assoc.pp^)=(^assoc + ^); %f(^Ordset.pp^)=(^doms + ^)}" + } + include T + include Identifiable.Make(T) + + init ~index ~name ~assoc ~doms = + {index; name; assoc; doms} + } + + precs = [| + Prec.init ~index:0 ~name:"mul" ~assoc:(Some Left) ~doms:(Ordset.empty Uns) + Prec.init ~index:1 ~name:"add" ~assoc:(Some Left) ~doms:(Ordset.singleton Uns 0) + |] + + Prod = { + T = { + type t: t = { + index: uns + lhs_index: uns + rhs_indexes: array uns + prec: option Prec.t + callback: uns + } + + hash_fold {index; _} state = + Uns.hash_fold index state + + cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + pp {index; lhs_index; rhs_indexes; prec; callback} formatter = + formatter + |> Fmt.fmt + "{%u=(^index + ^); %u=(^lhs_index + ^); %f(^Array.pp Uns.pp^)=(^rhs_indexes + ^); %f(^Option.pp Prec.pp^)=(^prec + ^); %u=(^callback + ^)}" + } + include T + include Identifiable.Make(T) + + init ~index ~lhs_index ~rhs_indexes ~prec ~callback = + {index; lhs_index; rhs_indexes; prec; callback} + } + + prods = [| + Prod.init ~index:0 ~lhs_index:8 ~rhs_indexes:[|2|] + ~prec:None ~callback:0 + Prod.init ~index:1 ~lhs_index:8 ~rhs_indexes:[|3|] + ~prec:None ~callback:1 + Prod.init ~index:2 ~lhs_index:9 ~rhs_indexes:[|4|] + ~prec:None ~callback:2 + Prod.init ~index:3 ~lhs_index:9 ~rhs_indexes:[|5|] + ~prec:None ~callback:3 + Prod.init ~index:4 ~lhs_index:10 ~rhs_indexes:[|10; 8; 10|] + ~prec:(Some (Array.get 0 precs)) ~callback:4 + Prod.init ~index:5 ~lhs_index:10 ~rhs_indexes:[|10; 9; 10|] + ~prec:(Some (Array.get 1 precs)) ~callback:5 + Prod.init ~index:6 ~lhs_index:10 ~rhs_indexes:[|6|] + ~prec:None ~callback:6 + Prod.init ~index:7 ~lhs_index:11 ~rhs_indexes:[|10; 7|] + ~prec:None ~callback:7 + Prod.init ~index:8 ~lhs_index:12 ~rhs_indexes:[|11; 1|] + ~prec:None ~callback:8 + |] + + Symbol = { + T = { + type t: t = { + index: uns + name: string + prec: option Prec.t + alias: option string + start: bool + prods: Ordset.t Prod.t Prod.cmper_witness + first: Ordset.t uns Uns.cmper_witness + follow: Ordset.t uns Uns.cmper_witness + } + + hash_fold {index; _} state = + Uns.hash_fold index state + + cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + pp {index; name; prec; alias; start; prods; first; follow} formatter = + formatter + |> Fmt.fmt + "{%u=(^index + ^); %s=(^name + ^); %f(^Option.pp Prec.pp^)=(^prec + ^); %f(^Option.pp String.pp^)=(^alias + ^); %b=(^start + ^); %f(^Ordset.pp^)=(^prods + ^); %f(^Ordset.pp^)=(^first + ^); %f(^Ordset.pp^)=(^follow + ^)}" + } + include T + include Identifiable.Make(T) + + init ~index ~name ~prec ~alias ~start ~prods ~first ~follow = + {index; name; prec; alias; start; prods; first; follow} + } + + symbols = [| + Symbol.init ~index:0 ~name:"EPSILON" + ~prec:None ~alias:(Some "ε") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 0) + ~follow:(Ordset.empty Uns) + Symbol.init ~index:1 ~name:"PSEUDO_END" + ~prec:None ~alias:(Some "⊥") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 1) + ~follow:(Ordset.singleton Uns 0) + Symbol.init ~index:2 ~name:"STAR" + ~prec:(Some (Array.get 0 precs)) ~alias:(Some "*") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 2) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:3 ~name:"SLASH" + ~prec:(Some (Array.get 0 precs)) ~alias:(Some "/") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 3) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:4 ~name:"PLUS" + ~prec:(Some (Array.get 1 precs)) ~alias:(Some "+") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 4) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:5 ~name:"MINUS" + ~prec:(Some (Array.get 1 precs)) ~alias:(Some "-") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 5) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:6 ~name:"INT" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 6) + ~follow:(Ordset.of_list Uns [2; 3; 4; 5; 7]) + Symbol.init ~index:7 ~name:"EOI" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 7) + ~follow:(Ordset.singleton Uns 1) + Symbol.init ~index:8 ~name:"MulOp" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.of_list Prod [ + Array.get 0 prods + Array.get 1 prods + ]) ~first:(Ordset.of_list Uns [2; 3]) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:9 ~name:"AddOp" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.of_list Prod [ + Array.get 2 prods + Array.get 3 prods + ]) ~first:(Ordset.of_list Uns [4; 5]) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:10 ~name:"Expr" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.of_list Prod [ + Array.get 4 prods + Array.get 5 prods + Array.get 6 prods + ]) ~first:(Ordset.singleton Uns 6) + ~follow:(Ordset.of_list Uns [2; 3; 4; 5; 7]) + Symbol.init ~index:11 ~name:"Answer" + ~prec:None ~alias:None ~start:true + ~prods:(Ordset.singleton Prod (Array.get 7 prods)) ~first:(Ordset.singleton Uns 6) + ~follow:(Ordset.singleton Uns 1) + Symbol.init ~index:12 ~name:"Answer'" + ~prec:None ~alias:None ~start:true + ~prods:(Ordset.singleton Prod (Array.get 8 prods)) ~first:(Ordset.singleton Uns 6) + ~follow:(Ordset.singleton Uns 0) + |] + + Lr0Item = { + T = { + type t: t = { + prod: Prod.t + dot: uns + } + + hash_fold {prod; dot} state = + state + |> Prod.hash_fold prod + |> Uns.hash_fold dot + + cmp {prod=p0; dot=d0} {prod=p1; dot=d1} = + let open Cmp + match Prod.cmp p0 p1 with + | Lt -> Lt + | Eq -> Uns.cmp d0 d1 + | Gt -> Gt + + pp {prod; dot} formatter = + formatter |> Fmt.fmt "{%f(^Prod.pp^)=(^prod^); %u=(^dot^)}" + } + include T + include Identifiable.Make(T) + + init ~prod ~dot = + {prod; dot} + } + + Lr1Item = { + T = { + type t: t = { + lr0item: Lr0Item.t + follow: Ordset.t uns Uns.cmper_witness + } + + hash_fold {lr0item; follow} state = + state + |> Lr0Item.hash_fold lr0item + |> Ordset.hash_fold follow + + cmp {lr0item=l0; follow=f0} {lr0item=l1; follow=f1} = + let open Cmp + match Lr0Item.cmp l0 l1 with + | Lt -> Lt + | Eq -> Ordset.cmp f0 f1 + | Gt -> Gt + + pp {lr0item; follow} formatter = + formatter + |> Fmt.fmt "{%f(^Lr0Item.pp^)=(^lr0item^); %f(^Ordset.pp^)=(^follow^)}" + } + include T + include Identifiable.Make(T) + + init ~lr0item ~follow = + {lr0item; follow} + } + + Lr1Itemset = { + T = { + type t: t = Ordmap.t Lr0Item.t Lr1Item.t Lr0Item.cmper_witness + + hash_fold = Ordmap.hash_fold Lr1Item.hash_fold + cmp = Ordmap.cmp Lr1Item.cmp + pp = Ordmap.pp Lr1Item.pp + } + include T + include Identifiable.Make(T) + + empty = Ordmap.empty Lr0Item + + init = Ordmap.of_alist Lr0Item + } + + Lr1ItemsetClosure = { + T = { + type t: t = { + index: uns + kernel: Lr1Itemset.t + added: Lr1Itemset.t + } + + hash_fold {index; _} state = + state |> Uns.hash_fold index + + cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + pp {index; kernel; added} formatter = + formatter + |> Fmt.fmt + "{%u=(^index + ^); %f(^Lr1Itemset.pp^)=(^kernel + ^); %f(^Lr1Itemset.pp^)=(^added + ^)}" + } + include T + include Identifiable.Make(T) + + init ~index ~kernel ~added = + {index; kernel; added} + } + + Action = { + T = { + type t: t = + | ShiftPrefix of uns + | ShiftAccept of uns + | Reduce of uns + + constructor_index = function + | ShiftPrefix _ -> 0 + | ShiftAccept _ -> 1 + | Reduce _ -> 2 + + arg_index = function + | ShiftPrefix arg_index + | ShiftAccept arg_index + | Reduce arg_index -> arg_index + + hash_fold t state = + state + |> Uns.hash_fold (constructor_index t) + |> Uns.hash_fold (arg_index t) + + cmp t0 t1 = + let open Cmp + match Uns.cmp (constructor_index t0) (constructor_index t1) with + | Lt -> Lt + | Eq -> Uns.cmp (arg_index t0) (arg_index t1) + | Gt -> Gt + + to_string = function + | ShiftPrefix state_index -> "ShiftPrefix %u(^state_index^)" + | ShiftAccept state_index -> "ShiftAccept %u(^state_index^)" + | Reduce prod_index -> "Reduce %u(^prod_index^)" + + pp t formatter = + formatter |> Fmt.fmt (to_string t) + } + include T + include Identifiable.Make(T) + } + + State = { + T = { + type t: t = { + lr1ItemsetClosure: Lr1ItemsetClosure.t + actions: Map.t uns Action.t Uns.cmper_witness + gotos: Map.t uns uns Uns.cmper_witness + } + + hash_fold {lr1ItemsetClosure; _} state = + state |> Lr1ItemsetClosure.hash_fold lr1ItemsetClosure + + cmp {lr1ItemsetClosure=c0; _} {lr1ItemsetClosure=c1; _} = + Lr1ItemsetClosure.cmp c0 c1 + + pp {lr1ItemsetClosure; actions; gotos} formatter = + formatter + |> Fmt.fmt + "{%f(^Lr1ItemsetClosure.pp^)=(^lr1ItemsetClosure + ^); %f(^Map.pp Action.pp^)=(^actions + ^); %f(^Map.pp Uns.pp^)=(^gotos + ^)}" + } + include T + include Identifiable.Make(T) + + init ~lr1ItemsetClosure ~actions ~gotos = + {lr1ItemsetClosure; actions; gotos} + } + + states = [| + (* 0 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:0 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 8 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [0] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 6 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 7 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [1] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 6, Action.ShiftPrefix 1 + ] + ~gotos: + Map.of_alist Uns [ + 10, 2 + 11, 3 + ] + (* 1 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:1 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 6 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 2, Action.Reduce 6 + 3, Action.Reduce 6 + 4, Action.Reduce 6 + 5, Action.Reduce 6 + 7, Action.Reduce 6 + ] + ~gotos: + Map.empty Uns + (* 2 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:2 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 7 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [1] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 0 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 1 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 2 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 3 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 2, Action.ShiftPrefix 4 + 3, Action.ShiftPrefix 5 + 4, Action.ShiftPrefix 6 + 5, Action.ShiftPrefix 7 + 7, Action.ShiftAccept 8 + ] + ~gotos: + Map.of_alist Uns [ + 8, 9 + 9, 10 + ] + (* 3 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:3 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 8 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [0] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 1, Action.ShiftPrefix 11 + ] + ~gotos: + Map.empty Uns + (* 4 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:4 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 0 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 6, Action.Reduce 0 + ] + ~gotos: + Map.empty Uns + (* 5 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:5 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 1 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 6, Action.Reduce 1 + ] + ~gotos: + Map.empty Uns + (* 6 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:6 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 2 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 6, Action.Reduce 2 + ] + ~gotos: + Map.empty Uns + (* 7 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:7 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 3 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 6, Action.Reduce 3 + ] + ~gotos: + Map.empty Uns + (* 8 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:8 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 7 prods) ~dot:2 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [1] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 1, Action.Reduce 7 + ] + ~gotos: + Map.empty Uns + (* 9 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:9 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:2 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 6 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 6, Action.ShiftPrefix 1 + ] + ~gotos: + Map.of_alist Uns [ + 10, 12 + ] + (* 10 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:10 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:2 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 6 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 6, Action.ShiftPrefix 1 + ] + ~gotos: + Map.of_alist Uns [ + 10, 13 + ] + (* 11 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:11 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 8 prods) ~dot:2 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [0] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 0, Action.Reduce 8 + ] + ~gotos: + Map.empty Uns + (* 12 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:12 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:3 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 0 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 1 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 2 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 3 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 2, Action.Reduce 4 + 3, Action.Reduce 4 + 4, Action.Reduce 4 + 5, Action.Reduce 4 + 7, Action.Reduce 4 + ] + ~gotos: + Map.of_alist Uns [ + 8, 9 + 9, 10 + ] + (* 13 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:13 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:3 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 0 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 1 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 2 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 3 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 2, Action.ShiftPrefix 4 + 3, Action.ShiftPrefix 5 + 4, Action.Reduce 5 + 5, Action.Reduce 5 + 7, Action.Reduce 5 + ] + ~gotos: + Map.of_alist Uns [ + 8, 9 + 9, 10 + ] + |] + } + + Token = { + T = { + type t: t = + | EPSILON # "ε" + | PSEUDO_END # "⊥" + | STAR # "*" + | SLASH # "/" + | PLUS # "+" + | MINUS # "-" + | INT of Zint.t + | EOI + + index = function + | EPSILON -> 0 + | PSEUDO_END -> 1 + | STAR -> 2 + | SLASH -> 3 + | PLUS -> 4 + | MINUS -> 5 + | INT _ -> 6 + | EOI -> 7 + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + spec t = + Array.get (index t) Spec.symbols + + pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + } + include T + include Identifiable.Make(T) + } + + Nonterm = { + T = { + type t: t = + | MulOp of Token.t + | AddOp of Token.t + | Expr of Zint.t + | Answer of Zint.t + | Answer' of Zint.t + + index = function + | MulOp _ -> 8 + | AddOp _ -> 9 + | Expr _ -> 10 + | Answer _ -> 11 + | Answer' _ -> 12 + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + spec t = + Array.get (index t) Spec.symbols + + pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + } + include T + include Identifiable.Make(T) + } + + Symbol = { + T = { + type t: t = + | Token of Token.t + | Nonterm of Nonterm.t + + index = function + | Token token -> Token.index token + | Nonterm nonterm -> Nonterm.index nonterm + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + spec = function + | Token token -> Token.spec token + | Nonterm nonterm -> Nonterm.spec nonterm + + pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + } + include T + include Identifiable.Make(T) + } + + State = { + T = { + type t: t = uns + + hash_fold t state = + state |> Uns.hash_fold t + + cmp t0 t1 = + Uns.cmp t0 t1 + + spec t = + Array.get t Spec.states + + pp t formatter = + formatter |> Uns.pp t + } + include T + include Identifiable.Make(T) + + init state_index = + state_index + } + + Stack = { + Elm = { + T = { + type t: t = { + symbol: Symbol.t; + state: State.t; + } + + hash_fold {symbol; state} hash_state = + hash_state + |> Symbol.hash_fold symbol + |> State.hash_fold state + + cmp {symbol=symbol0; state=state0} {symbol=symbol1; state=state1} = + let open Cmp + match State.cmp state0 state1 with + | Lt -> Lt + | Eq -> Symbol.cmp symbol0 symbol1 + | Gt -> Gt + + let pp {symbol; state} formatter = + formatter |> Fmt.fmt "{%f(^Symbol.pp^)=(^symbol^); %f(^State.pp^)=(^state^)}" + } + include T + include Identifiable.Make(T) + + init ~symbol ~state = + {symbol; state} + } + + type t: t = list Elm.t + + fmt ?(alt=false) ?(width=0) t formatter = + formatter |> List.fmt ~alt ~width Elm.pp t + + pp t formatter = + formatter |> fmt t + + Reduction = { + T = { + type stack: stack = t + type t: t = uns + type callback: callback = stack -> Symbol.t * stack + + hash_fold t state = + state |> Uns.hash_fold t + + cmp t0 t1 = + Uns.cmp t0 t1 + + pp t formatter = + formatter |> Uns.pp t + } + include T + include Identifiable.Make(T) + + callbacks = [| + (* 0 *) function + | _ + :: tl -> Symbol.Nonterm (MulOp ( + # ________________________________________________________________________________ + [:"./Example_b.hmh":10:4+11]STAR[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 1 *) function + | _ + :: tl -> Symbol.Nonterm (MulOp ( + # ________________________________________________________________________________ + [:"./Example_b.hmh":11:4+11]SLASH[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 2 *) function + | _ + :: tl -> Symbol.Nonterm (AddOp ( + # ________________________________________________________________________________ + [:"./Example_b.hmh":17:4+11]PLUS[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 3 *) function + | _ + :: tl -> Symbol.Nonterm (AddOp ( + # ________________________________________________________________________________ + [:"./Example_b.hmh":18:4+11]MINUS[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 4 *) function + | {symbol=Symbol.Nonterm (Expr e1); _} + :: {symbol=Symbol.Nonterm (MulOp op); _} + :: {symbol=Symbol.Nonterm (Expr e0); _} + :: tl -> Symbol.Nonterm (Expr ( + # ________________________________________________________________________________ + [:"./Example_b.hmh":23:8+0]match op with + | STAR -> Zint.(e0 * e1) + | SLASH -> Zint.(e0 / e1) + | _ -> not_reached ()[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 5 *) function + | {symbol=Symbol.Nonterm (Expr e1); _} + :: {symbol=Symbol.Nonterm (AddOp op); _} + :: {symbol=Symbol.Nonterm (Expr e0); _} + :: tl -> Symbol.Nonterm (Expr ( + # ________________________________________________________________________________ + [:"./Example_b.hmh":28:8+0]match op with + | PLUS -> Zint.(e0 + e1) + | MINUS -> Zint.(e0 - e1) + | _ -> not_reached ()[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 6 *) function + | {symbol=Symbol.Token (INT x); _} + :: tl -> Symbol.Nonterm (Expr ( + # ________________________________________________________________________________ + [:"./Example_b.hmh":32:4+13]x[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 7 *) function + | _ + :: {symbol=Symbol.Nonterm (Expr e); _} + :: tl -> Symbol.Nonterm (Answer ( + # ________________________________________________________________________________ + [:"./Example_b.hmh":36:4+18]e (* A comment that should be excluded from postlude. *) # And another. + # In `hocc` block; exclude from postlude + # In `hocc` block; exclude from postlude + # In `hocc` block; exclude from postlude +[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 8 *) fn _stack -> not_reached () + |] + + callback t = + Array.get t callbacks + + init callback_index = + callback_index + } + + shift ~symbol ~state t = + (Elm.init ~symbol ~state) :: t + + # goto: Symbol.t -> t -> t + goto symbol t = + match t with + | [] -> not_reached () + | Elm.{state; _} :: _ -> + let symbol_index = Symbol.index symbol + let Spec.State.{gotos; _} = Array.get state Spec.states + let state' = Map.get_hlt symbol_index gotos |> State.init + shift ~symbol ~state:state' t + + reduce ~reduction t = + let callback = Reduction.callback reduction + let symbol, t' = callback t + goto symbol t' + } + + Status = { + T = { + type t: t = + | ShiftPrefix of Token.t * State.t + | ShiftAccept of Token.t * State.t + | Reduce of Token.t * Stack.Reduction.t + | Prefix + | Accept of Nonterm.t + | Reject of Token.t + + let constructor_index = function + | ShiftPrefix _ -> 0 + | ShiftAccept _ -> 1 + | Reduce _ -> 2 + | Prefix -> 3 + | Accept _ -> 4 + | Reject _ -> 5 + + let hash_fold t state = + state + |> Uns.hash_fold (constructor_index t) + |> fn hash_state -> + match t with + | ShiftPrefix (token, state) + | ShiftAccept (token, state) -> + hash_state |> State.hash_fold state |> Token.hash_fold token + | Reduce (token, reduction) -> + hash_state |> Stack.Reduction.hash_fold reduction |> Token.hash_fold token + | Prefix -> hash_state + | Accept nonterm -> hash_state |> Nonterm.hash_fold nonterm + | Reject token -> hash_state |> Token.hash_fold token + + let cmp t0 t1 = + let open Cmp + match Uns.cmp (constructor_index t0) (constructor_index t1) with + | Lt -> Lt + | Eq -> + match t0, t1 with + | ShiftPrefix (token0, state0), ShiftPrefix (token1, state1) + | ShiftAccept (token0, state0), ShiftAccept (token1, state1) -> + match State.cmp state0 state1 with + | Lt -> Lt + | Eq -> Token.cmp token0 token1 + | Gt -> Gt + | Reduce (token0, reduction0), Reduce (token1, reduction1) -> + match Stack.Reduction.cmp reduction0 reduction1 with + | Lt -> Lt + | Eq -> Token.cmp token0 token1 + | Gt -> Gt + | Prefix, Prefix -> Eq + | Accept nonterm0, Accept nonterm1 -> Nonterm.cmp nonterm0 nonterm1 + | Reject token0, Reject token1 -> Token.cmp token0 token1 + | _, _ -> not_reached () + | Gt -> Gt + + pp t formatter = + formatter + |> fn formatter -> + match t with + | ShiftPrefix (token, state) -> + formatter + |> Fmt.fmt "ShiftPrefix (%f(^Token.pp^)(^token^), %f(^State.pp^)(^state + ^))" + | ShiftAccept (token, state) -> + formatter + |> Fmt.fmt "ShiftAccept (%f(^Token.pp^)(^token^), %f(^State.pp^)(^state + ^))" + | Reduce reduction -> + formatter + |> Fmt.fmt "Reduce (%f(^Token.pp^)(^token^), %f(^Stack.Reduction.pp + ^)(^reduction^))" + | Prefix -> formatter |> Fmt.fmt "Prefix" + | Accept nonterm -> formatter |> Fmt.fmt "Accept %f(^Nonterm.pp^)(^nonterm^)" + | Reject token -> formatter |> Fmt.fmt "Reject %f(^Token.pp^)(^token^)" + } + include T + include Identifiable.Make(T) + } + + type t: t = { + stack: Stack.t + status: Status.t + } + + Start = { + Answer = { + boi = { + stack=[{ + symbol=Token Token.EPSILON + state=State.init 0 + }] + status=Prefix + } + } + } + + feed token = function + | {stack={state; _} :: _; status=Prefix} as t -> + let token_index = Token.index token + let Spec.State.{actions; _} = Array.get state Spec.states + let status = match Map.get token_index actions with + | Some (Spec.Action.ShiftPrefix state') -> Status.ShiftPrefix (token, state') + | Some (Spec.Action.ShiftAccept state') -> Status.ShiftAccept (token, state') + | Some (Spec.Action.Reduce prod_index) -> + let Spec.Prod.{callback=callback_index; _} = Array.get prod_index Spec.prods + let reduction = Stack.Reduction.init callback_index + Status.Reduce (token, reduction) + | None -> Status.Reject token + {t with status} + | _ -> not_reached () + + step {stack; status} = + let open Status + match status with + | ShiftPrefix (token, state) -> {stack=shift token state stack; status=Prefix} + | ShiftAccept (token, state) -> + # Shift, perform the ⊥ reduction, and extract the accepted symbol from the stack. + let stack = shift token state stack + let pseudo_end_index = Token.index Token.PSEUDO_END + let Spec.State.{actions; _} = Array.get state Spec.states + match Map.get_hlt pseudo_end_index actions with + | Spec.Action.Reduce prod_index -> + let Spec.Prod.{callback=callback_index; _} = Array.get prod_index Spec.prods + let reduction = Stack.Reduction.init callback_index + let stack = Stack.reduce ~reduction stack + match stack with + | [] -> not_reached () + | {symbol=Token _; _} :: _ -> not_reached () + | {symbol=Nonterm nonterm; _} :: _ -> {stack=[]; status=Accept nonterm} + | _ -> not_reached () + | Reduce (token, reduction) -> + feed token {stack=Stack.reduce ~reduction stack; status=Prefix} + | _ -> not_reached () + + # walk: t -> t + rec walk ({status; _} as t) = + let open Status + match status with + | ShiftPrefix _ + | ShiftAccept _ + | Reduce _ -> t |> step |> walk + | Prefix + | Accept _ + | Reject _ -> t + + next token ({status; _} as t) = + match status with + | Status.Prefix -> t |> feed token |> walk + | _ -> not_reached () + }[:"./Example_b.hmh":40:0+0] # [First] Outside `hocc` block; include in postlude + # Outside `hocc` block; include in postlude + # Outside `hocc` block; include in postlude +# Outside `hocc` block; include in postlude + +# Tokenize `s`, e.g. "2 + 3 * 4", and append an `EOI` token. +tokenize s = + s |> String.split_rev ~f:(fn cp -> Codepoint.(cp = ' ')) + |> List.rev_filter ~f:(fn s -> not (String.is_empty s)) + |> List.rev_map ~f:fn s -> + let open Token + match s with + | "*" -> STAR + | "/" -> SLASH + | "+" -> PLUS + | "-" -> MINUS + | _ -> INT (Zint.of_string s) + |> List.push Token.EOI + |> List.rev + +# Calculate the result of the arithmetic expression expressed in `s`, e.g. "2 + 3 * 4". +calculate s = + let {status; _} = List.fold_until (tokenize s) ~init:Start.Answer.boi ~f:fn parser tok -> + let {status; _} as parser' = Start.Answer.next tok parser + let done = match status with + | Prefix -> false + | Accept _ + | Error _ -> true + | _ -> not_reached () + parser', done + match status with + | Accept (Answer answer) -> answer + | Prefix _ -> halt "Partial input" + | Error _ -> halt "Parse error" + | _ -> not_reached () diff --git a/bootstrap/test/hocc/Example_b.expected.hmi b/bootstrap/test/hocc/Example_b.expected.hmi new file mode 100644 index 000000000..1b3aadc72 --- /dev/null +++ b/bootstrap/test/hocc/Example_b.expected.hmi @@ -0,0 +1,258 @@ +# This file was generated by `hocc` based on "Example_b.hmhi" +[:"./Example_b.hmhi":1]open import Basis + +# The `hocc` keyword is on a continued line. Indentation should remain a multiple of 4. +include + [:]{ + Spec = { + Algorithm = { + type t: t = + | Lr1 [@doc "LR(1) algorithm."] + | Ielr1 [@doc "IELR(1) algorithm."] + | Pgm1 [@doc "PGM(1) algorithm."] + | Lalr1 [@doc "LALR(1) algorithm."] + + include IdentifiableIntf.S with type t := t + } + + algorithm: Algorithm.t + [@@doc "Algorithm used to generate parser."] + + Assoc = { + type t: t = + | Left + | Right + + include IdentifiableIntf.S with type t := t + } + + Prec = { + type t: t = { + index: uns # Index in `precs` array. + name: string + assoc: option Assoc.t + doms: Ordset.t uns Uns.cmper_witness # Indices in `precs` array of dominator + # precedences. + } + + include IdentifiableIntf.S with type t := t + } + + precs: array Prec.t + [@@doc "Array of precedences, where each element's `index` field corresponds to the + element's array index."] + + Prod = { + type t: t = { + index: uns # Index in `prods` array. + lhs_index: uns + rhs_indexes: array uns + prec: option Prec.t + callback: uns # Index of reduction callback in `Stack.Reduction.callbacks`. + } + + include IdentifiableIntf.S with type t := t + } + + prods: array Prod.t + [@@doc "Array of productions, where each element's `index` field corresponds to the + element's array index."] + + Symbol = { + type t: t = { + index: uns # Index in `symbols` array. + name: string + prec: option Prec.t + alias: option string + start: bool + prods: Ordset.t Prod.t Prod.cmper_witness # empty ≡ token + first: Ordset.t uns Uns.cmper_witness + follow: Ordset.t uns Uns.cmper_witness + } + + include IdentifiableIntf.S with type t := t + } + + symbols: array Symbol.t + [@@doc "Array of symbols, where each element's `index` field corresponds to the element's + array index."] + + Lr0Item = { + type t: t = { + prod: Prod.t + dot: uns + } + + include IdentifiableIntf.S with type t := t + } + + Lr1Item = { + type t: t = { + lr0item: Lr0Item.t + follow: Ordset.t uns Uns.cmper_witness + } + + include IdentifiableIntf.S with type t := t + } + + Lr1Itemset = { + type t: t = Ordmap.t Lr0Item.t Lr1Item.t Lr0Item.cmper_witness + + include IdentifiableIntf.S with type t := t + } + + Lr1ItemsetClosure = { + type t: t = { + index: uns # Index of corresponding `State.t` in `states` array. + kernel: Lr1Itemset.t + added: Lr1Itemset.t + } + + include IdentifiableIntf.S with type t := t + } + + Action = { + type t: t = + | ShiftPrefix of uns # `states` index. + | ShiftAccept of uns # `states` index. + | Reduce of uns # `prods` index. + + include IdentifiableIntf.S with type t := t + } + + State = { + type t: t = { + lr1ItemsetClosure: Lr1ItemsetClosure.t + actions: Map.t uns Action.t Uns.cmper_witness + gotos: Map.t uns uns Uns.cmper_witness + } + + include IdentifiableIntf.S with type t := t + } + + states: array State.t + [@@doc "Array of CFSM states, where each element's `lr1ItemsetClosure.index` field + corresponds to the element's array index."] + } + + Token = { + type t: t = + | EPSILON # "ε" + | PSEUDO_END # "⊥" + | STAR # "*" + | SLASH # "/" + | PLUS # "+" + | MINUS # "-" + | INT of Zint.t + | EOI + + include IdentifiableIntf.S with type t := t + + spec: t -> Spec.Symbol.t + } + + Nonterm = { + type t: t = + | MulOp of Token.t + | AddOp of Token.t + | Expr of Zint.t + | Answer of Zint.t + | Answer' of Zint.t + + include IdentifiableIntf.S with type t := t + + spec: t -> Spec.Symbol.t + } + + Symbol = { + type t: t = + | Token of Token.t + | Nonterm of Nonterm.t + + include IdentifiableIntf.S with type t := t + + spec: t -> Spec.Symbol.t + } + + State = { + type t: t = uns + + include IdentifiableIntf.S with type t := t + + spec: t -> Spec.State.t + } + + Stack = { + module Elm : sig + type t: t = { + symbol: Symbol.t; + state: State.t; + } + + include IdentifiableIntf.S with type t := t + end + + type t: t = Elm.t list + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + fmt >e: ?alt:bool -> ?width:uns -> t -> Fmt.Formatter e >e-> Fmt.Formatter e + + Reduction = { + type stack: stack = t + type t: t + type callback: callback = stack -> Symbol.t * stack + + include IdentifiableIntf.S with type t := t + + callbacks: array callback + [@@doc "Array of reduction callback functions containing embedded parser code."] + + callback: t -> callback + } + + shift: symbol:Symbol.t -> state:State.t -> t -> t + [@@doc "Perform a shift."] + + reduce: reduction:Reduction.t -> t -> t + [@@doc "Perform a reduction."] + } + + Status = { + type t: t = + # `feed`/`step` may produce these variants; `next` fast-forwards over them. + | ShiftPrefix of Token.t * State.t + | ShiftAccept of Token.t * State.t + | Reduce of Token.t * Stack.Reduction.t + # Common variants. + | Prefix # Valid parse prefix; more input needed. + | Accept of Nonterm.t # Successful parse result. + | Reject of Token.t # Syntax error due to unexpected token. + + include IdentifiableIntf.S with type t := t + } + + type t: t = { + stack: Stack.t + status: Status.t + } + + Start = { + Answer = { + boi: t + } + } + + feed: Token.t -> t -> t + [@@doc "`feed token t` returns a result with status in {`ShiftPrefix`, `ShiftAccept`, + `Reduce`, `Reject`}. `t.status` must be `Prefix`."] + + step: t -> t + [@@doc "`step t` returns the result of applying one state transition to `t`. `t.status` must + be in {`ShiftPrefix`, `ShiftAccept`, `Reduce`}."] + + next: Token.t -> t -> t + [@@doc "`next token t` calls `feed token t` and fast-forwards via `step` calls to return a + result with status in {`Prefix`, `Accept`, `Reject`}. `t.status` must be `Prefix`."] + }[:"./Example_b.hmhi":5:0+6] + +calculate: string -> zint diff --git a/bootstrap/test/hocc/Example_b.hmh b/bootstrap/test/hocc/Example_b.hmh new file mode 100644 index 000000000..6dbca2cfb --- /dev/null +++ b/bootstrap/test/hocc/Example_b.hmh @@ -0,0 +1,74 @@ +open import Basis + +# The `hocc` keyword is on a continued line. Indentation should remain a multiple of 4. +include + hocc + left mul + token STAR "*" prec mul + token SLASH "/" prec mul + nonterm MulOp of Token.t ::= + | "*" -> STAR + | "/" -> SLASH + + left add < mul + token PLUS "+" prec add + token MINUS "-" prec add + nonterm AddOp of Token.t ::= + | "+" -> PLUS + | "-" -> MINUS + + token INT of Zint.t + nonterm Expr of Zint.t ::= + | e0:Expr op:MulOp e1:Expr prec mul -> + match op with + | STAR -> Zint.(e0 * e1) + | SLASH -> Zint.(e0 / e1) + | _ -> not_reached () + | e0:Expr op:AddOp e1:Expr prec add -> + match op with + | PLUS -> Zint.(e0 + e1) + | MINUS -> Zint.(e0 - e1) + | _ -> not_reached () + | x:INT -> x + + token EOI + start Answer of Zint.t ::= + | e:Expr EOI -> e (* A comment that should be excluded from postlude. *) # And another. + # In `hocc` block; exclude from postlude + # In `hocc` block; exclude from postlude + # In `hocc` block; exclude from postlude + # [First] Outside `hocc` block; include in postlude + # Outside `hocc` block; include in postlude + # Outside `hocc` block; include in postlude +# Outside `hocc` block; include in postlude + +# Tokenize `s`, e.g. "2 + 3 * 4", and append an `EOI` token. +tokenize s = + s |> String.split_rev ~f:(fn cp -> Codepoint.(cp = ' ')) + |> List.rev_filter ~f:(fn s -> not (String.is_empty s)) + |> List.rev_map ~f:fn s -> + let open Token + match s with + | "*" -> STAR + | "/" -> SLASH + | "+" -> PLUS + | "-" -> MINUS + | _ -> INT (Zint.of_string s) + |> List.push Token.EOI + |> List.rev + +# Calculate the result of the arithmetic expression expressed in `s`, e.g. "2 + 3 * 4". +calculate s = + let {status; _} = List.fold_until (tokenize s) ~init:Start.Answer.boi ~f:fn parser tok -> + let {status; _} as parser' = Start.Answer.next tok parser + let done = match status with + | Prefix -> false + | Accept _ + | Error _ -> true + | _ -> not_reached () + parser', done + match status with + | Accept (Answer answer) -> answer + | Prefix _ -> halt "Partial input" + | Error _ -> halt "Parse error" + | _ -> not_reached () diff --git a/bootstrap/test/hocc/Example_b.hmhi b/bootstrap/test/hocc/Example_b.hmhi new file mode 100644 index 000000000..9e1bd8488 --- /dev/null +++ b/bootstrap/test/hocc/Example_b.hmhi @@ -0,0 +1,7 @@ +open import Basis + +# The `hocc` keyword is on a continued line. Indentation should remain a multiple of 4. +include + hocc + +calculate: string -> zint diff --git a/bootstrap/test/hocc/Example_c.expected b/bootstrap/test/hocc/Example_c.expected new file mode 100644 index 000000000..153cdb127 --- /dev/null +++ b/bootstrap/test/hocc/Example_c.expected @@ -0,0 +1,13 @@ +hocc: Parsing "./Example_c.hmhi" +hocc: Parsing "./Example_c.hmh" +hocc: Generating LR(1) specification +hocc: 2 precedences, 8 tokens, 5 non-terminals, 9 productions +hocc: LR(1) item set compatibility: lr1 +hocc: Generating LR(1) item set closures (+^.=add/split/merge)+++++++++++++ +hocc: Generating 14 LR(1) states +hocc: 0 unresolvable conflicts in 0 states +hocc: 0 unreachable states +hocc: 0 remergeable states +hocc: Searching for unused precedences/tokens/non-terminals/productions +hocc: Writing "./Example_c.hmi" +hocc: Writing "./Example_c.hm" diff --git a/bootstrap/test/hocc/Example_c.expected.hm b/bootstrap/test/hocc/Example_c.expected.hm new file mode 100644 index 000000000..2d067ad1b --- /dev/null +++ b/bootstrap/test/hocc/Example_c.expected.hm @@ -0,0 +1,1410 @@ +# This file was generated by `hocc` based on "Example_c.hmh" +[:"./Example_c.hmh":1]open import Basis + +Parser = { + # Better written as `Parser = hocc`, but this is an indentation test. + include [:]{ + Spec = { + Algorithm = { + T = { + type t: t = + | Lr1 + | Ielr1 + | Pgm1 + | Lalr1 + + index = function + | Lr1 -> 0 + | Ielr1 -> 1 + | Pgm1 -> 2 + | Lalr1 -> 3 + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + to_string = function + | Lr1 -> "Lr1" + | Ielr1 -> "Ielr1" + | Pgm1 -> "Pgm1" + | Lalr1 -> "Lalr1" + + pp t formatter = + formatter |> Fmt.fmt (to_string t) + } + include T + include Identifiable.Make(T) + } + + algorithm = Algorithm.Lr1 + + Assoc = { + T = { + type t: t = + | Left + | Right + + index = function + | Left -> 0 + | Right -> 1 + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + to_string = function + | Left -> "Left" + | Right -> "Right" + + pp t formatter = + formatter |> Fmt.fmt (to_string t) + } + include T + include Identifiable.Make(T) + } + + Prec = { + T = { + type t: t = { + index: uns + name: string + assoc: option Assoc.t + doms: Ordset.t uns Uns.cmper_witness + } + + index {index; _} = + index + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + pp {index; name; assoc; doms} formatter = + formatter + |> Fmt.fmt + "{%u=(^index + ^); %s=(^name + ^); %f(^Option.pp Assoc.pp^)=(^assoc + ^); %f(^Ordset.pp^)=(^doms + ^)}" + } + include T + include Identifiable.Make(T) + + init ~index ~name ~assoc ~doms = + {index; name; assoc; doms} + } + + precs = [| + Prec.init ~index:0 ~name:"mul" ~assoc:(Some Left) ~doms:(Ordset.empty Uns) + Prec.init ~index:1 ~name:"add" ~assoc:(Some Left) ~doms:(Ordset.singleton Uns 0) + |] + + Prod = { + T = { + type t: t = { + index: uns + lhs_index: uns + rhs_indexes: array uns + prec: option Prec.t + callback: uns + } + + hash_fold {index; _} state = + Uns.hash_fold index state + + cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + pp {index; lhs_index; rhs_indexes; prec; callback} formatter = + formatter + |> Fmt.fmt + "{%u=(^index + ^); %u=(^lhs_index + ^); %f(^Array.pp Uns.pp^)=(^rhs_indexes + ^); %f(^Option.pp Prec.pp^)=(^prec + ^); %u=(^callback + ^)}" + } + include T + include Identifiable.Make(T) + + init ~index ~lhs_index ~rhs_indexes ~prec ~callback = + {index; lhs_index; rhs_indexes; prec; callback} + } + + prods = [| + Prod.init ~index:0 ~lhs_index:8 ~rhs_indexes:[|2|] + ~prec:None ~callback:0 + Prod.init ~index:1 ~lhs_index:8 ~rhs_indexes:[|3|] + ~prec:None ~callback:1 + Prod.init ~index:2 ~lhs_index:9 ~rhs_indexes:[|4|] + ~prec:None ~callback:2 + Prod.init ~index:3 ~lhs_index:9 ~rhs_indexes:[|5|] + ~prec:None ~callback:3 + Prod.init ~index:4 ~lhs_index:10 ~rhs_indexes:[|10; 8; 10|] + ~prec:(Some (Array.get 0 precs)) ~callback:4 + Prod.init ~index:5 ~lhs_index:10 ~rhs_indexes:[|10; 9; 10|] + ~prec:(Some (Array.get 1 precs)) ~callback:5 + Prod.init ~index:6 ~lhs_index:10 ~rhs_indexes:[|6|] + ~prec:None ~callback:6 + Prod.init ~index:7 ~lhs_index:11 ~rhs_indexes:[|10; 7|] + ~prec:None ~callback:7 + Prod.init ~index:8 ~lhs_index:12 ~rhs_indexes:[|11; 1|] + ~prec:None ~callback:8 + |] + + Symbol = { + T = { + type t: t = { + index: uns + name: string + prec: option Prec.t + alias: option string + start: bool + prods: Ordset.t Prod.t Prod.cmper_witness + first: Ordset.t uns Uns.cmper_witness + follow: Ordset.t uns Uns.cmper_witness + } + + hash_fold {index; _} state = + Uns.hash_fold index state + + cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + pp {index; name; prec; alias; start; prods; first; follow} formatter = + formatter + |> Fmt.fmt + "{%u=(^index + ^); %s=(^name + ^); %f(^Option.pp Prec.pp^)=(^prec + ^); %f(^Option.pp String.pp^)=(^alias + ^); %b=(^start + ^); %f(^Ordset.pp^)=(^prods + ^); %f(^Ordset.pp^)=(^first + ^); %f(^Ordset.pp^)=(^follow + ^)}" + } + include T + include Identifiable.Make(T) + + init ~index ~name ~prec ~alias ~start ~prods ~first ~follow = + {index; name; prec; alias; start; prods; first; follow} + } + + symbols = [| + Symbol.init ~index:0 ~name:"EPSILON" + ~prec:None ~alias:(Some "ε") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 0) + ~follow:(Ordset.empty Uns) + Symbol.init ~index:1 ~name:"PSEUDO_END" + ~prec:None ~alias:(Some "⊥") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 1) + ~follow:(Ordset.singleton Uns 0) + Symbol.init ~index:2 ~name:"STAR" + ~prec:(Some (Array.get 0 precs)) ~alias:(Some "*") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 2) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:3 ~name:"SLASH" + ~prec:(Some (Array.get 0 precs)) ~alias:(Some "/") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 3) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:4 ~name:"PLUS" + ~prec:(Some (Array.get 1 precs)) ~alias:(Some "+") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 4) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:5 ~name:"MINUS" + ~prec:(Some (Array.get 1 precs)) ~alias:(Some "-") ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 5) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:6 ~name:"INT" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 6) + ~follow:(Ordset.of_list Uns [2; 3; 4; 5; 7]) + Symbol.init ~index:7 ~name:"EOI" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.empty Prod) ~first:(Ordset.singleton Uns 7) + ~follow:(Ordset.singleton Uns 1) + Symbol.init ~index:8 ~name:"MulOp" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.of_list Prod [ + Array.get 0 prods + Array.get 1 prods + ]) ~first:(Ordset.of_list Uns [2; 3]) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:9 ~name:"AddOp" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.of_list Prod [ + Array.get 2 prods + Array.get 3 prods + ]) ~first:(Ordset.of_list Uns [4; 5]) + ~follow:(Ordset.singleton Uns 6) + Symbol.init ~index:10 ~name:"Expr" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.of_list Prod [ + Array.get 4 prods + Array.get 5 prods + Array.get 6 prods + ]) ~first:(Ordset.singleton Uns 6) + ~follow:(Ordset.of_list Uns [2; 3; 4; 5; 7]) + Symbol.init ~index:11 ~name:"Answer" + ~prec:None ~alias:None ~start:true + ~prods:(Ordset.singleton Prod (Array.get 7 prods)) ~first:(Ordset.singleton Uns 6) + ~follow:(Ordset.singleton Uns 1) + Symbol.init ~index:12 ~name:"Answer'" + ~prec:None ~alias:None ~start:true + ~prods:(Ordset.singleton Prod (Array.get 8 prods)) ~first:(Ordset.singleton Uns 6) + ~follow:(Ordset.singleton Uns 0) + |] + + Lr0Item = { + T = { + type t: t = { + prod: Prod.t + dot: uns + } + + hash_fold {prod; dot} state = + state + |> Prod.hash_fold prod + |> Uns.hash_fold dot + + cmp {prod=p0; dot=d0} {prod=p1; dot=d1} = + let open Cmp + match Prod.cmp p0 p1 with + | Lt -> Lt + | Eq -> Uns.cmp d0 d1 + | Gt -> Gt + + pp {prod; dot} formatter = + formatter |> Fmt.fmt "{%f(^Prod.pp^)=(^prod^); %u=(^dot^)}" + } + include T + include Identifiable.Make(T) + + init ~prod ~dot = + {prod; dot} + } + + Lr1Item = { + T = { + type t: t = { + lr0item: Lr0Item.t + follow: Ordset.t uns Uns.cmper_witness + } + + hash_fold {lr0item; follow} state = + state + |> Lr0Item.hash_fold lr0item + |> Ordset.hash_fold follow + + cmp {lr0item=l0; follow=f0} {lr0item=l1; follow=f1} = + let open Cmp + match Lr0Item.cmp l0 l1 with + | Lt -> Lt + | Eq -> Ordset.cmp f0 f1 + | Gt -> Gt + + pp {lr0item; follow} formatter = + formatter + |> Fmt.fmt "{%f(^Lr0Item.pp^)=(^lr0item^); %f(^Ordset.pp^)=(^follow^)}" + } + include T + include Identifiable.Make(T) + + init ~lr0item ~follow = + {lr0item; follow} + } + + Lr1Itemset = { + T = { + type t: t = Ordmap.t Lr0Item.t Lr1Item.t Lr0Item.cmper_witness + + hash_fold = Ordmap.hash_fold Lr1Item.hash_fold + cmp = Ordmap.cmp Lr1Item.cmp + pp = Ordmap.pp Lr1Item.pp + } + include T + include Identifiable.Make(T) + + empty = Ordmap.empty Lr0Item + + init = Ordmap.of_alist Lr0Item + } + + Lr1ItemsetClosure = { + T = { + type t: t = { + index: uns + kernel: Lr1Itemset.t + added: Lr1Itemset.t + } + + hash_fold {index; _} state = + state |> Uns.hash_fold index + + cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + pp {index; kernel; added} formatter = + formatter + |> Fmt.fmt + "{%u=(^index + ^); %f(^Lr1Itemset.pp^)=(^kernel + ^); %f(^Lr1Itemset.pp^)=(^added + ^)}" + } + include T + include Identifiable.Make(T) + + init ~index ~kernel ~added = + {index; kernel; added} + } + + Action = { + T = { + type t: t = + | ShiftPrefix of uns + | ShiftAccept of uns + | Reduce of uns + + constructor_index = function + | ShiftPrefix _ -> 0 + | ShiftAccept _ -> 1 + | Reduce _ -> 2 + + arg_index = function + | ShiftPrefix arg_index + | ShiftAccept arg_index + | Reduce arg_index -> arg_index + + hash_fold t state = + state + |> Uns.hash_fold (constructor_index t) + |> Uns.hash_fold (arg_index t) + + cmp t0 t1 = + let open Cmp + match Uns.cmp (constructor_index t0) (constructor_index t1) with + | Lt -> Lt + | Eq -> Uns.cmp (arg_index t0) (arg_index t1) + | Gt -> Gt + + to_string = function + | ShiftPrefix state_index -> "ShiftPrefix %u(^state_index^)" + | ShiftAccept state_index -> "ShiftAccept %u(^state_index^)" + | Reduce prod_index -> "Reduce %u(^prod_index^)" + + pp t formatter = + formatter |> Fmt.fmt (to_string t) + } + include T + include Identifiable.Make(T) + } + + State = { + T = { + type t: t = { + lr1ItemsetClosure: Lr1ItemsetClosure.t + actions: Map.t uns Action.t Uns.cmper_witness + gotos: Map.t uns uns Uns.cmper_witness + } + + hash_fold {lr1ItemsetClosure; _} state = + state |> Lr1ItemsetClosure.hash_fold lr1ItemsetClosure + + cmp {lr1ItemsetClosure=c0; _} {lr1ItemsetClosure=c1; _} = + Lr1ItemsetClosure.cmp c0 c1 + + pp {lr1ItemsetClosure; actions; gotos} formatter = + formatter + |> Fmt.fmt + "{%f(^Lr1ItemsetClosure.pp^)=(^lr1ItemsetClosure + ^); %f(^Map.pp Action.pp^)=(^actions + ^); %f(^Map.pp Uns.pp^)=(^gotos + ^)}" + } + include T + include Identifiable.Make(T) + + init ~lr1ItemsetClosure ~actions ~gotos = + {lr1ItemsetClosure; actions; gotos} + } + + states = [| + (* 0 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:0 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 8 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [0] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 6 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 7 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [1] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 6, Action.ShiftPrefix 1 + ] + ~gotos: + Map.of_alist Uns [ + 10, 2 + 11, 3 + ] + (* 1 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:1 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 6 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 2, Action.Reduce 6 + 3, Action.Reduce 6 + 4, Action.Reduce 6 + 5, Action.Reduce 6 + 7, Action.Reduce 6 + ] + ~gotos: + Map.empty Uns + (* 2 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:2 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 7 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [1] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 0 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 1 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 2 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 3 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 2, Action.ShiftPrefix 4 + 3, Action.ShiftPrefix 5 + 4, Action.ShiftPrefix 6 + 5, Action.ShiftPrefix 7 + 7, Action.ShiftAccept 8 + ] + ~gotos: + Map.of_alist Uns [ + 8, 9 + 9, 10 + ] + (* 3 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:3 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 8 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [0] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 1, Action.ShiftPrefix 11 + ] + ~gotos: + Map.empty Uns + (* 4 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:4 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 0 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 6, Action.Reduce 0 + ] + ~gotos: + Map.empty Uns + (* 5 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:5 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 1 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 6, Action.Reduce 1 + ] + ~gotos: + Map.empty Uns + (* 6 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:6 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 2 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 6, Action.Reduce 2 + ] + ~gotos: + Map.empty Uns + (* 7 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:7 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 3 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 6, Action.Reduce 3 + ] + ~gotos: + Map.empty Uns + (* 8 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:8 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 7 prods) ~dot:2 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [1] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 1, Action.Reduce 7 + ] + ~gotos: + Map.empty Uns + (* 9 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:9 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:2 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 6 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 6, Action.ShiftPrefix 1 + ] + ~gotos: + Map.of_alist Uns [ + 10, 12 + ] + (* 10 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:10 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:2 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 6 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 6, Action.ShiftPrefix 1 + ] + ~gotos: + Map.of_alist Uns [ + 10, 13 + ] + (* 11 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:11 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 8 prods) ~dot:2 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [0] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.empty + ~actions: + Map.of_alist Uns [ + 0, Action.Reduce 8 + ] + ~gotos: + Map.empty Uns + (* 12 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:12 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:3 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 0 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 1 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 2 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 3 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 2, Action.Reduce 4 + 3, Action.Reduce 4 + 4, Action.Reduce 4 + 5, Action.Reduce 4 + 7, Action.Reduce 4 + ] + ~gotos: + Map.of_alist Uns [ + 8, 9 + 9, 10 + ] + (* 13 *) State.init + ~lr1ItemsetClosure: + Lr1ItemsetClosure.init + ~index:13 + ~kernel: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:1 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5 prods) ~dot:3 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [2; 3; 4; 5; 7] + lr0item, lr1item + ) + ] + ~added: + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 0 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 1 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 2 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ( + let lr0item = Lr0Item.init ~prod:(Array.get 3 prods) ~dot:0 + let lr1item = Lr1Item.init ~lr0item ~follow: + Ordset.of_list Uns [6] + lr0item, lr1item + ) + ] + ~actions: + Map.of_alist Uns [ + 2, Action.ShiftPrefix 4 + 3, Action.ShiftPrefix 5 + 4, Action.Reduce 5 + 5, Action.Reduce 5 + 7, Action.Reduce 5 + ] + ~gotos: + Map.of_alist Uns [ + 8, 9 + 9, 10 + ] + |] + } + + Token = { + T = { + type t: t = + | EPSILON # "ε" + | PSEUDO_END # "⊥" + | STAR # "*" + | SLASH # "/" + | PLUS # "+" + | MINUS # "-" + | INT of Zint.t + | EOI + + index = function + | EPSILON -> 0 + | PSEUDO_END -> 1 + | STAR -> 2 + | SLASH -> 3 + | PLUS -> 4 + | MINUS -> 5 + | INT _ -> 6 + | EOI -> 7 + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + spec t = + Array.get (index t) Spec.symbols + + pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + } + include T + include Identifiable.Make(T) + } + + Nonterm = { + T = { + type t: t = + | MulOp of Token.t + | AddOp of Token.t + | Expr of Zint.t + | Answer of Zint.t + | Answer' of Zint.t + + index = function + | MulOp _ -> 8 + | AddOp _ -> 9 + | Expr _ -> 10 + | Answer _ -> 11 + | Answer' _ -> 12 + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + spec t = + Array.get (index t) Spec.symbols + + pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + } + include T + include Identifiable.Make(T) + } + + Symbol = { + T = { + type t: t = + | Token of Token.t + | Nonterm of Nonterm.t + + index = function + | Token token -> Token.index token + | Nonterm nonterm -> Nonterm.index nonterm + + hash_fold t state = + state |> Uns.hash_fold (index t) + + cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + spec = function + | Token token -> Token.spec token + | Nonterm nonterm -> Nonterm.spec nonterm + + pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + } + include T + include Identifiable.Make(T) + } + + State = { + T = { + type t: t = uns + + hash_fold t state = + state |> Uns.hash_fold t + + cmp t0 t1 = + Uns.cmp t0 t1 + + spec t = + Array.get t Spec.states + + pp t formatter = + formatter |> Uns.pp t + } + include T + include Identifiable.Make(T) + + init state_index = + state_index + } + + Stack = { + Elm = { + T = { + type t: t = { + symbol: Symbol.t; + state: State.t; + } + + hash_fold {symbol; state} hash_state = + hash_state + |> Symbol.hash_fold symbol + |> State.hash_fold state + + cmp {symbol=symbol0; state=state0} {symbol=symbol1; state=state1} = + let open Cmp + match State.cmp state0 state1 with + | Lt -> Lt + | Eq -> Symbol.cmp symbol0 symbol1 + | Gt -> Gt + + let pp {symbol; state} formatter = + formatter |> Fmt.fmt "{%f(^Symbol.pp^)=(^symbol^); %f(^State.pp^)=(^state^)}" + } + include T + include Identifiable.Make(T) + + init ~symbol ~state = + {symbol; state} + } + + type t: t = list Elm.t + + fmt ?(alt=false) ?(width=0) t formatter = + formatter |> List.fmt ~alt ~width Elm.pp t + + pp t formatter = + formatter |> fmt t + + Reduction = { + T = { + type stack: stack = t + type t: t = uns + type callback: callback = stack -> Symbol.t * stack + + hash_fold t state = + state |> Uns.hash_fold t + + cmp t0 t1 = + Uns.cmp t0 t1 + + pp t formatter = + formatter |> Uns.pp t + } + include T + include Identifiable.Make(T) + + callbacks = [| + (* 0 *) function + | _ + :: tl -> Symbol.Nonterm (MulOp ( + # ____________________________________________________________________________ + [:"./Example_c.hmh":10:8+11]STAR[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 1 *) function + | _ + :: tl -> Symbol.Nonterm (MulOp ( + # ____________________________________________________________________________ + [:"./Example_c.hmh":11:8+11]SLASH[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 2 *) function + | _ + :: tl -> Symbol.Nonterm (AddOp ( + # ____________________________________________________________________________ + [:"./Example_c.hmh":17:8+11]PLUS[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 3 *) function + | _ + :: tl -> Symbol.Nonterm (AddOp ( + # ____________________________________________________________________________ + [:"./Example_c.hmh":18:8+11]MINUS[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 4 *) function + | {symbol=Symbol.Nonterm (Expr e1); _} + :: {symbol=Symbol.Nonterm (MulOp op); _} + :: {symbol=Symbol.Nonterm (Expr e0); _} + :: tl -> Symbol.Nonterm (Expr ( + # ____________________________________________________________________________ + [:"./Example_c.hmh":23:12+0]match op with + | STAR -> Zint.(e0 * e1) + | SLASH -> Zint.(e0 / e1) + | _ -> not_reached ()[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 5 *) function + | {symbol=Symbol.Nonterm (Expr e1); _} + :: {symbol=Symbol.Nonterm (AddOp op); _} + :: {symbol=Symbol.Nonterm (Expr e0); _} + :: tl -> Symbol.Nonterm (Expr ( + # ____________________________________________________________________________ + [:"./Example_c.hmh":28:12+0]match op with + | PLUS -> Zint.(e0 + e1) + | MINUS -> Zint.(e0 - e1) + | _ -> not_reached ()[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 6 *) function + | {symbol=Symbol.Token (INT x); _} + :: tl -> Symbol.Nonterm (Expr ( + # ____________________________________________________________________________ + [:"./Example_c.hmh":32:8+13]x[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 7 *) function + | _ + :: {symbol=Symbol.Nonterm (Expr e); _} + :: tl -> Symbol.Nonterm (Answer ( + # ____________________________________________________________________________ + [:"./Example_c.hmh":36:8+18]e[:] + # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + )), tl + | _ -> not_reached () + (* 8 *) fn _stack -> not_reached () + |] + + callback t = + Array.get t callbacks + + init callback_index = + callback_index + } + + shift ~symbol ~state t = + (Elm.init ~symbol ~state) :: t + + # goto: Symbol.t -> t -> t + goto symbol t = + match t with + | [] -> not_reached () + | Elm.{state; _} :: _ -> + let symbol_index = Symbol.index symbol + let Spec.State.{gotos; _} = Array.get state Spec.states + let state' = Map.get_hlt symbol_index gotos |> State.init + shift ~symbol ~state:state' t + + reduce ~reduction t = + let callback = Reduction.callback reduction + let symbol, t' = callback t + goto symbol t' + } + + Status = { + T = { + type t: t = + | ShiftPrefix of Token.t * State.t + | ShiftAccept of Token.t * State.t + | Reduce of Token.t * Stack.Reduction.t + | Prefix + | Accept of Nonterm.t + | Reject of Token.t + + let constructor_index = function + | ShiftPrefix _ -> 0 + | ShiftAccept _ -> 1 + | Reduce _ -> 2 + | Prefix -> 3 + | Accept _ -> 4 + | Reject _ -> 5 + + let hash_fold t state = + state + |> Uns.hash_fold (constructor_index t) + |> fn hash_state -> + match t with + | ShiftPrefix (token, state) + | ShiftAccept (token, state) -> + hash_state |> State.hash_fold state |> Token.hash_fold token + | Reduce (token, reduction) -> + hash_state |> Stack.Reduction.hash_fold reduction |> Token.hash_fold token + | Prefix -> hash_state + | Accept nonterm -> hash_state |> Nonterm.hash_fold nonterm + | Reject token -> hash_state |> Token.hash_fold token + + let cmp t0 t1 = + let open Cmp + match Uns.cmp (constructor_index t0) (constructor_index t1) with + | Lt -> Lt + | Eq -> + match t0, t1 with + | ShiftPrefix (token0, state0), ShiftPrefix (token1, state1) + | ShiftAccept (token0, state0), ShiftAccept (token1, state1) -> + match State.cmp state0 state1 with + | Lt -> Lt + | Eq -> Token.cmp token0 token1 + | Gt -> Gt + | Reduce (token0, reduction0), Reduce (token1, reduction1) -> + match Stack.Reduction.cmp reduction0 reduction1 with + | Lt -> Lt + | Eq -> Token.cmp token0 token1 + | Gt -> Gt + | Prefix, Prefix -> Eq + | Accept nonterm0, Accept nonterm1 -> Nonterm.cmp nonterm0 nonterm1 + | Reject token0, Reject token1 -> Token.cmp token0 token1 + | _, _ -> not_reached () + | Gt -> Gt + + pp t formatter = + formatter + |> fn formatter -> + match t with + | ShiftPrefix (token, state) -> + formatter + |> Fmt.fmt "ShiftPrefix (%f(^Token.pp^)(^token^), %f(^State.pp^)(^state + ^))" + | ShiftAccept (token, state) -> + formatter + |> Fmt.fmt "ShiftAccept (%f(^Token.pp^)(^token^), %f(^State.pp^)(^state + ^))" + | Reduce reduction -> + formatter + |> Fmt.fmt "Reduce (%f(^Token.pp^)(^token^), %f(^Stack.Reduction.pp + ^)(^reduction^))" + | Prefix -> formatter |> Fmt.fmt "Prefix" + | Accept nonterm -> formatter |> Fmt.fmt "Accept %f(^Nonterm.pp^)(^nonterm^)" + | Reject token -> formatter |> Fmt.fmt "Reject %f(^Token.pp^)(^token^)" + } + include T + include Identifiable.Make(T) + } + + type t: t = { + stack: Stack.t + status: Status.t + } + + Start = { + Answer = { + boi = { + stack=[{ + symbol=Token Token.EPSILON + state=State.init 0 + }] + status=Prefix + } + } + } + + feed token = function + | {stack={state; _} :: _; status=Prefix} as t -> + let token_index = Token.index token + let Spec.State.{actions; _} = Array.get state Spec.states + let status = match Map.get token_index actions with + | Some (Spec.Action.ShiftPrefix state') -> Status.ShiftPrefix (token, state') + | Some (Spec.Action.ShiftAccept state') -> Status.ShiftAccept (token, state') + | Some (Spec.Action.Reduce prod_index) -> + let Spec.Prod.{callback=callback_index; _} = Array.get prod_index Spec.prods + let reduction = Stack.Reduction.init callback_index + Status.Reduce (token, reduction) + | None -> Status.Reject token + {t with status} + | _ -> not_reached () + + step {stack; status} = + let open Status + match status with + | ShiftPrefix (token, state) -> {stack=shift token state stack; status=Prefix} + | ShiftAccept (token, state) -> + # Shift, perform the ⊥ reduction, and extract the accepted symbol from the stack. + let stack = shift token state stack + let pseudo_end_index = Token.index Token.PSEUDO_END + let Spec.State.{actions; _} = Array.get state Spec.states + match Map.get_hlt pseudo_end_index actions with + | Spec.Action.Reduce prod_index -> + let Spec.Prod.{callback=callback_index; _} = Array.get prod_index Spec.prods + let reduction = Stack.Reduction.init callback_index + let stack = Stack.reduce ~reduction stack + match stack with + | [] -> not_reached () + | {symbol=Token _; _} :: _ -> not_reached () + | {symbol=Nonterm nonterm; _} :: _ -> {stack=[]; status=Accept nonterm} + | _ -> not_reached () + | Reduce (token, reduction) -> + feed token {stack=Stack.reduce ~reduction stack; status=Prefix} + | _ -> not_reached () + + # walk: t -> t + rec walk ({status; _} as t) = + let open Status + match status with + | ShiftPrefix _ + | ShiftAccept _ + | Reduce _ -> t |> step |> walk + | Prefix + | Accept _ + | Reject _ -> t + + next token ({status; _} as t) = + match status with + | Status.Prefix -> t |> feed token |> walk + | _ -> not_reached () + }[:"./Example_c.hmh":36:4+23] + } + +# Tokenize `s`, e.g. "2 + 3 * 4", and append an `EOI` token. +tokenize s = + s |> String.split_rev ~f:(fn cp -> Codepoint.(cp = ' ')) + |> List.rev_filter ~f:(fn s -> not (String.is_empty s)) + |> List.rev_map ~f:fn s -> + let open Token + match s with + | "*" -> STAR + | "/" -> SLASH + | "+" -> PLUS + | "-" -> MINUS + | _ -> INT (Zint.of_string s) + |> List.push Token.EOI + |> List.rev + +# Calculate the result of the arithmetic expression expressed in `s`, e.g. "2 + 3 * 4". +calculate s = + let {status; _} = List.fold_until (tokenize s) ~init:Start.Answer.boi ~f:fn parser tok -> + let {status; _} as parser' = Start.Answer.next tok parser + let done = match status with + | Prefix -> false + | Accept _ + | Error _ -> true + | _ -> not_reached () + parser', done + match status with + | Accept (Answer answer) -> answer + | Prefix _ -> halt "Partial input" + | Error _ -> halt "Parse error" + | _ -> not_reached () diff --git a/bootstrap/test/hocc/Example_c.expected.hmi b/bootstrap/test/hocc/Example_c.expected.hmi new file mode 100644 index 000000000..d9eacb881 --- /dev/null +++ b/bootstrap/test/hocc/Example_c.expected.hmi @@ -0,0 +1,259 @@ +# This file was generated by `hocc` based on "Example_c.hmhi" +[:"./Example_c.hmhi":1]open import Basis + +Parser = { + # Better written as `Parser = hocc`, but this is an indentation test. + include [:]{ + Spec = { + Algorithm = { + type t: t = + | Lr1 [@doc "LR(1) algorithm."] + | Ielr1 [@doc "IELR(1) algorithm."] + | Pgm1 [@doc "PGM(1) algorithm."] + | Lalr1 [@doc "LALR(1) algorithm."] + + include IdentifiableIntf.S with type t := t + } + + algorithm: Algorithm.t + [@@doc "Algorithm used to generate parser."] + + Assoc = { + type t: t = + | Left + | Right + + include IdentifiableIntf.S with type t := t + } + + Prec = { + type t: t = { + index: uns # Index in `precs` array. + name: string + assoc: option Assoc.t + doms: Ordset.t uns Uns.cmper_witness # Indices in `precs` array of dominator + # precedences. + } + + include IdentifiableIntf.S with type t := t + } + + precs: array Prec.t + [@@doc "Array of precedences, where each element's `index` field corresponds to the + element's array index."] + + Prod = { + type t: t = { + index: uns # Index in `prods` array. + lhs_index: uns + rhs_indexes: array uns + prec: option Prec.t + callback: uns # Index of reduction callback in `Stack.Reduction.callbacks`. + } + + include IdentifiableIntf.S with type t := t + } + + prods: array Prod.t + [@@doc "Array of productions, where each element's `index` field corresponds to the + element's array index."] + + Symbol = { + type t: t = { + index: uns # Index in `symbols` array. + name: string + prec: option Prec.t + alias: option string + start: bool + prods: Ordset.t Prod.t Prod.cmper_witness # empty ≡ token + first: Ordset.t uns Uns.cmper_witness + follow: Ordset.t uns Uns.cmper_witness + } + + include IdentifiableIntf.S with type t := t + } + + symbols: array Symbol.t + [@@doc "Array of symbols, where each element's `index` field corresponds to the element's + array index."] + + Lr0Item = { + type t: t = { + prod: Prod.t + dot: uns + } + + include IdentifiableIntf.S with type t := t + } + + Lr1Item = { + type t: t = { + lr0item: Lr0Item.t + follow: Ordset.t uns Uns.cmper_witness + } + + include IdentifiableIntf.S with type t := t + } + + Lr1Itemset = { + type t: t = Ordmap.t Lr0Item.t Lr1Item.t Lr0Item.cmper_witness + + include IdentifiableIntf.S with type t := t + } + + Lr1ItemsetClosure = { + type t: t = { + index: uns # Index of corresponding `State.t` in `states` array. + kernel: Lr1Itemset.t + added: Lr1Itemset.t + } + + include IdentifiableIntf.S with type t := t + } + + Action = { + type t: t = + | ShiftPrefix of uns # `states` index. + | ShiftAccept of uns # `states` index. + | Reduce of uns # `prods` index. + + include IdentifiableIntf.S with type t := t + } + + State = { + type t: t = { + lr1ItemsetClosure: Lr1ItemsetClosure.t + actions: Map.t uns Action.t Uns.cmper_witness + gotos: Map.t uns uns Uns.cmper_witness + } + + include IdentifiableIntf.S with type t := t + } + + states: array State.t + [@@doc "Array of CFSM states, where each element's `lr1ItemsetClosure.index` field + corresponds to the element's array index."] + } + + Token = { + type t: t = + | EPSILON # "ε" + | PSEUDO_END # "⊥" + | STAR # "*" + | SLASH # "/" + | PLUS # "+" + | MINUS # "-" + | INT of Zint.t + | EOI + + include IdentifiableIntf.S with type t := t + + spec: t -> Spec.Symbol.t + } + + Nonterm = { + type t: t = + | MulOp of Token.t + | AddOp of Token.t + | Expr of Zint.t + | Answer of Zint.t + | Answer' of Zint.t + + include IdentifiableIntf.S with type t := t + + spec: t -> Spec.Symbol.t + } + + Symbol = { + type t: t = + | Token of Token.t + | Nonterm of Nonterm.t + + include IdentifiableIntf.S with type t := t + + spec: t -> Spec.Symbol.t + } + + State = { + type t: t = uns + + include IdentifiableIntf.S with type t := t + + spec: t -> Spec.State.t + } + + Stack = { + module Elm : sig + type t: t = { + symbol: Symbol.t; + state: State.t; + } + + include IdentifiableIntf.S with type t := t + end + + type t: t = Elm.t list + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + fmt >e: ?alt:bool -> ?width:uns -> t -> Fmt.Formatter e >e-> Fmt.Formatter e + + Reduction = { + type stack: stack = t + type t: t + type callback: callback = stack -> Symbol.t * stack + + include IdentifiableIntf.S with type t := t + + callbacks: array callback + [@@doc "Array of reduction callback functions containing embedded parser code."] + + callback: t -> callback + } + + shift: symbol:Symbol.t -> state:State.t -> t -> t + [@@doc "Perform a shift."] + + reduce: reduction:Reduction.t -> t -> t + [@@doc "Perform a reduction."] + } + + Status = { + type t: t = + # `feed`/`step` may produce these variants; `next` fast-forwards over them. + | ShiftPrefix of Token.t * State.t + | ShiftAccept of Token.t * State.t + | Reduce of Token.t * Stack.Reduction.t + # Common variants. + | Prefix # Valid parse prefix; more input needed. + | Accept of Nonterm.t # Successful parse result. + | Reject of Token.t # Syntax error due to unexpected token. + + include IdentifiableIntf.S with type t := t + } + + type t: t = { + stack: Stack.t + status: Status.t + } + + Start = { + Answer = { + boi: t + } + } + + feed: Token.t -> t -> t + [@@doc "`feed token t` returns a result with status in {`ShiftPrefix`, `ShiftAccept`, + `Reduce`, `Reject`}. `t.status` must be `Prefix`."] + + step: t -> t + [@@doc "`step t` returns the result of applying one state transition to `t`. `t.status` must + be in {`ShiftPrefix`, `ShiftAccept`, `Reduce`}."] + + next: Token.t -> t -> t + [@@doc "`next token t` calls `feed token t` and fast-forwards via `step` calls to return a + result with status in {`Prefix`, `Accept`, `Reject`}. `t.status` must be `Prefix`."] + }[:"./Example_c.hmhi":5:4+12] + } + +calculate: string -> zint diff --git a/bootstrap/test/hocc/Example_c.hmh b/bootstrap/test/hocc/Example_c.hmh new file mode 100644 index 000000000..ccd213f19 --- /dev/null +++ b/bootstrap/test/hocc/Example_c.hmh @@ -0,0 +1,68 @@ +open import Basis + +Parser = { + # Better written as `Parser = hocc`, but this is an indentation test. + include hocc + left mul + token STAR "*" prec mul + token SLASH "/" prec mul + nonterm MulOp of Token.t ::= + | "*" -> STAR + | "/" -> SLASH + + left add < mul + token PLUS "+" prec add + token MINUS "-" prec add + nonterm AddOp of Token.t ::= + | "+" -> PLUS + | "-" -> MINUS + + token INT of Zint.t + nonterm Expr of Zint.t ::= + | e0:Expr op:MulOp e1:Expr prec mul -> + match op with + | STAR -> Zint.(e0 * e1) + | SLASH -> Zint.(e0 / e1) + | _ -> not_reached () + | e0:Expr op:AddOp e1:Expr prec add -> + match op with + | PLUS -> Zint.(e0 + e1) + | MINUS -> Zint.(e0 - e1) + | _ -> not_reached () + | x:INT -> x + + token EOI + start Answer of Zint.t ::= + | e:Expr EOI -> e + } + +# Tokenize `s`, e.g. "2 + 3 * 4", and append an `EOI` token. +tokenize s = + s |> String.split_rev ~f:(fn cp -> Codepoint.(cp = ' ')) + |> List.rev_filter ~f:(fn s -> not (String.is_empty s)) + |> List.rev_map ~f:fn s -> + let open Token + match s with + | "*" -> STAR + | "/" -> SLASH + | "+" -> PLUS + | "-" -> MINUS + | _ -> INT (Zint.of_string s) + |> List.push Token.EOI + |> List.rev + +# Calculate the result of the arithmetic expression expressed in `s`, e.g. "2 + 3 * 4". +calculate s = + let {status; _} = List.fold_until (tokenize s) ~init:Start.Answer.boi ~f:fn parser tok -> + let {status; _} as parser' = Start.Answer.next tok parser + let done = match status with + | Prefix -> false + | Accept _ + | Error _ -> true + | _ -> not_reached () + parser', done + match status with + | Accept (Answer answer) -> answer + | Prefix _ -> halt "Partial input" + | Error _ -> halt "Parse error" + | _ -> not_reached () diff --git a/bootstrap/test/hocc/Example_c.hmhi b/bootstrap/test/hocc/Example_c.hmhi new file mode 100644 index 000000000..e56024c8e --- /dev/null +++ b/bootstrap/test/hocc/Example_c.hmhi @@ -0,0 +1,8 @@ +open import Basis + +Parser = { + # Better written as `Parser = hocc`, but this is an indentation test. + include hocc + } + +calculate: string -> zint diff --git a/bootstrap/test/hocc/Example_ml.expected b/bootstrap/test/hocc/Example_ml.expected new file mode 100644 index 000000000..716dab963 --- /dev/null +++ b/bootstrap/test/hocc/Example_ml.expected @@ -0,0 +1 @@ +14z \ No newline at end of file diff --git a/bootstrap/test/hocc/Example_ml.expected.ml b/bootstrap/test/hocc/Example_ml.expected.ml new file mode 100644 index 000000000..68a87d10b --- /dev/null +++ b/bootstrap/test/hocc/Example_ml.expected.ml @@ -0,0 +1,1586 @@ +(* This file was generated by `hocc` based on "Example_ml.hmh" *) +open Basis +open! Basis.Rudiments + +(* Specify the parser. `hocc ...` expands to a module implementation, `{ ... }`. *) +include struct + module Spec = struct + module Algorithm = struct + module T = struct + type t = + | Lr1 + | Ielr1 + | Pgm1 + | Lalr1 + + let index = function + | Lr1 -> 0L + | Ielr1 -> 1L + | Pgm1 -> 2L + | Lalr1 -> 3L + + let hash_fold t state = + state |> Uns.hash_fold (index t) + + let cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + let to_string = function + | Lr1 -> "Lr1" + | Ielr1 -> "Ielr1" + | Pgm1 -> "Pgm1" + | Lalr1 -> "Lalr1" + + let pp t formatter = + formatter |> Fmt.fmt (to_string t) + end + include T + include Identifiable.Make(T) + end + + let algorithm = Algorithm.Lr1 + + module Assoc = struct + module T = struct + type t = + | Left + | Right + + let index = function + | Left -> 0L + | Right -> 1L + + let hash_fold t state = + state |> Uns.hash_fold (index t) + + let cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + let to_string = function + | Left -> "Left" + | Right -> "Right" + + let pp t formatter = + formatter |> Fmt.fmt (to_string t) + end + include T + include Identifiable.Make(T) + end + + module Prec = struct + module T = struct + type t = { + index: uns; + name: string; + assoc: Assoc.t option; + doms: (uns, Uns.cmper_witness) Ordset.t; + } + + let index {index; _} = + index + + let hash_fold t state = + state |> Uns.hash_fold (index t) + + let cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + let pp {index; name; assoc; doms} formatter = + formatter + |> Fmt.fmt "{index=" |> Uns.pp index + |> Fmt.fmt "; name=" |> String.pp name + |> Fmt.fmt "; assoc=" |> Option.pp Assoc.pp assoc + |> Fmt.fmt "; doms=" |> Ordset.pp doms + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) + + let init ~index ~name ~assoc ~doms = + {index; name; assoc; doms} + end + + let precs = [| + Prec.init ~index:0L ~name:"mul" ~assoc:(Some Left) ~doms:(Ordset.empty (module Uns)); + Prec.init ~index:1L ~name:"add" ~assoc:(Some Left) ~doms:(Ordset.singleton (module Uns) 0L) + |] + + module Prod = struct + module T = struct + type t = { + index: uns; + lhs_index: uns; + rhs_indexes: uns array; + prec: Prec.t option; + callback: uns; + } + + let hash_fold {index; _} state = + Uns.hash_fold index state + + let cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + let pp {index; lhs_index; rhs_indexes; prec; callback} formatter = + formatter + |> Fmt.fmt "{index=" |> Uns.pp index + |> Fmt.fmt "; lhs_index=" |> Uns.pp lhs_index + |> Fmt.fmt "; rhs_indexes=" |> Array.pp Uns.pp rhs_indexes + |> Fmt.fmt "; prec=" |> Option.pp Prec.pp prec + |> Fmt.fmt "; callback=" |> Uns.pp callback + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) + + let init ~index ~lhs_index ~rhs_indexes ~prec ~callback = + {index; lhs_index; rhs_indexes; prec; callback} + end + + let prods = [| + Prod.init ~index:0L ~lhs_index:8L ~rhs_indexes:[|2L|] + ~prec:None ~callback:0L; + Prod.init ~index:1L ~lhs_index:8L ~rhs_indexes:[|3L|] + ~prec:None ~callback:1L; + Prod.init ~index:2L ~lhs_index:9L ~rhs_indexes:[|4L|] + ~prec:None ~callback:2L; + Prod.init ~index:3L ~lhs_index:9L ~rhs_indexes:[|5L|] + ~prec:None ~callback:3L; + Prod.init ~index:4L ~lhs_index:10L ~rhs_indexes:[|10L; 8L; 10L|] + ~prec:(Some (Array.get 0L precs)) ~callback:4L; + Prod.init ~index:5L ~lhs_index:10L ~rhs_indexes:[|10L; 9L; 10L|] + ~prec:(Some (Array.get 1L precs)) ~callback:5L; + Prod.init ~index:6L ~lhs_index:10L ~rhs_indexes:[|6L|] + ~prec:None ~callback:6L; + Prod.init ~index:7L ~lhs_index:11L ~rhs_indexes:[|10L; 7L|] + ~prec:None ~callback:7L; + Prod.init ~index:8L ~lhs_index:12L ~rhs_indexes:[|11L; 1L|] + ~prec:None ~callback:8L + |] + + module Symbol = struct + module T = struct + type t = { + index: uns; + name: string; + prec: Prec.t option; + alias: string option; + start: bool; + prods: (Prod.t, Prod.cmper_witness) Ordset.t; + first: (uns, Uns.cmper_witness) Ordset.t; + follow: (uns, Uns.cmper_witness) Ordset.t; + } + + let hash_fold {index; _} state = + Uns.hash_fold index state + + let cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + let pp {index; name; prec; alias; start; prods; first; follow} formatter = + formatter + |> Fmt.fmt "{index=" |> Uns.pp index + |> Fmt.fmt "; name=" |> String.pp name + |> Fmt.fmt "; prec=" |> Option.pp Prec.pp prec + |> Fmt.fmt "; alias=" |> Option.pp String.pp alias + |> Fmt.fmt "; start=" |> Bool.pp start + |> Fmt.fmt "; prods=" |> Ordset.pp prods + |> Fmt.fmt "; first=" |> Ordset.pp first + |> Fmt.fmt "; follow=" |> Ordset.pp follow + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) + + let init ~index ~name ~prec ~alias ~start ~prods ~first ~follow = + {index; name; prec; alias; start; prods; first; follow} + end + + let symbols = [| + Symbol.init ~index:0L ~name:"EPSILON" + ~prec:None ~alias:(Some "ε") ~start:false + ~prods:(Ordset.empty (module Prod)) ~first:(Ordset.singleton (module Uns) 0L) + ~follow:(Ordset.empty (module Uns)); + Symbol.init ~index:1L ~name:"PSEUDO_END" + ~prec:None ~alias:(Some "⊥") ~start:false + ~prods:(Ordset.empty (module Prod)) ~first:(Ordset.singleton (module Uns) 1L) + ~follow:(Ordset.singleton (module Uns) 0L); + Symbol.init ~index:2L ~name:"STAR" + ~prec:(Some (Array.get 0L precs)) ~alias:(Some "*") ~start:false + ~prods:(Ordset.empty (module Prod)) ~first:(Ordset.singleton (module Uns) 2L) + ~follow:(Ordset.singleton (module Uns) 6L); + Symbol.init ~index:3L ~name:"SLASH" + ~prec:(Some (Array.get 0L precs)) ~alias:(Some "/") ~start:false + ~prods:(Ordset.empty (module Prod)) ~first:(Ordset.singleton (module Uns) 3L) + ~follow:(Ordset.singleton (module Uns) 6L); + Symbol.init ~index:4L ~name:"PLUS" + ~prec:(Some (Array.get 1L precs)) ~alias:(Some "+") ~start:false + ~prods:(Ordset.empty (module Prod)) ~first:(Ordset.singleton (module Uns) 4L) + ~follow:(Ordset.singleton (module Uns) 6L); + Symbol.init ~index:5L ~name:"MINUS" + ~prec:(Some (Array.get 1L precs)) ~alias:(Some "-") ~start:false + ~prods:(Ordset.empty (module Prod)) ~first:(Ordset.singleton (module Uns) 5L) + ~follow:(Ordset.singleton (module Uns) 6L); + Symbol.init ~index:6L ~name:"INT" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.empty (module Prod)) ~first:(Ordset.singleton (module Uns) 6L) + ~follow:(Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L]); + Symbol.init ~index:7L ~name:"EOI" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.empty (module Prod)) ~first:(Ordset.singleton (module Uns) 7L) + ~follow:(Ordset.singleton (module Uns) 1L); + Symbol.init ~index:8L ~name:"MulOp" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.of_list (module Prod) [ + Array.get 0L prods; + Array.get 1L prods; + ]) ~first:(Ordset.of_list (module Uns) [2L; 3L]) + ~follow:(Ordset.singleton (module Uns) 6L); + Symbol.init ~index:9L ~name:"AddOp" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.of_list (module Prod) [ + Array.get 2L prods; + Array.get 3L prods; + ]) ~first:(Ordset.of_list (module Uns) [4L; 5L]) + ~follow:(Ordset.singleton (module Uns) 6L); + Symbol.init ~index:10L ~name:"Expr" + ~prec:None ~alias:None ~start:false + ~prods:(Ordset.of_list (module Prod) [ + Array.get 4L prods; + Array.get 5L prods; + Array.get 6L prods; + ]) ~first:(Ordset.singleton (module Uns) 6L) + ~follow:(Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L]); + Symbol.init ~index:11L ~name:"Answer" + ~prec:None ~alias:None ~start:true + ~prods:(Ordset.singleton (module Prod) (Array.get 7L prods)) ~first:(Ordset.singleton (module Uns) 6L) + ~follow:(Ordset.singleton (module Uns) 1L); + Symbol.init ~index:12L ~name:"Answer'" + ~prec:None ~alias:None ~start:true + ~prods:(Ordset.singleton (module Prod) (Array.get 8L prods)) ~first:(Ordset.singleton (module Uns) 6L) + ~follow:(Ordset.singleton (module Uns) 0L) + |] + + module Lr0Item = struct + module T = struct + type t = { + prod: Prod.t; + dot: uns; + } + + let hash_fold {prod; dot} state = + state + |> Prod.hash_fold prod + |> Uns.hash_fold dot + + let cmp {prod=p0; dot=d0} {prod=p1; dot=d1} = + let open Cmp in + match Prod.cmp p0 p1 with + | Lt -> Lt + | Eq -> Uns.cmp d0 d1 + | Gt -> Gt + + let pp {prod; dot} formatter = + formatter + |> Fmt.fmt "{prod=" |> Prod.pp prod + |> Fmt.fmt "; dot=" |> Uns.pp dot + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) + + let init ~prod ~dot = + {prod; dot} + end + + module Lr1Item = struct + module T = struct + type t = { + lr0item: Lr0Item.t; + follow: (uns, Uns.cmper_witness) Ordset.t; + } + + let hash_fold {lr0item; follow} state = + state + |> Lr0Item.hash_fold lr0item + |> Ordset.hash_fold follow + + let cmp {lr0item=l0; follow=f0} {lr0item=l1; follow=f1} = + let open Cmp in + match Lr0Item.cmp l0 l1 with + | Lt -> Lt + | Eq -> Ordset.cmp f0 f1 + | Gt -> Gt + + let pp {lr0item; follow} formatter = + formatter + |> Fmt.fmt "{lr0item=" |> Lr0Item.pp lr0item + |> Fmt.fmt "; follow=" |> Ordset.pp follow + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) + + let init ~lr0item ~follow = + {lr0item; follow} + end + + module Lr1Itemset = struct + module T = struct + type t = (Lr0Item.t, Lr1Item.t, Lr0Item.cmper_witness) Ordmap.t + + let hash_fold t = + Ordmap.hash_fold Lr1Item.hash_fold t + + let cmp t0 t1 = + Ordmap.cmp Lr1Item.cmp t0 t1 + + let pp = Ordmap.pp Lr1Item.pp + end + include T + include Identifiable.Make(T) + + let empty = Ordmap.empty (module Lr0Item) + + let init = Ordmap.of_alist (module Lr0Item) + end + + module Lr1ItemsetClosure = struct + module T = struct + type t = { + index: uns; + kernel: Lr1Itemset.t; + added: Lr1Itemset.t; + } + + let hash_fold {index; _} state = + state |> Uns.hash_fold index + + let cmp {index=i0; _} {index=i1; _} = + Uns.cmp i0 i1 + + let pp {index; kernel; added} formatter = + formatter + |> Fmt.fmt "{index=" |> Uns.pp index + |> Fmt.fmt "; kernel=" |> Lr1Itemset.pp kernel + |> Fmt.fmt "; added=" |> Lr1Itemset.pp added + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) + + let init ~index ~kernel ~added = + {index; kernel; added} + end + + module Action = struct + module T = struct + type t = + | ShiftPrefix of uns + | ShiftAccept of uns + | Reduce of uns + + let constructor_index = function + | ShiftPrefix _ -> 0L + | ShiftAccept _ -> 1L + | Reduce _ -> 2L + + let arg_index = function + | ShiftPrefix arg_index + | ShiftAccept arg_index + | Reduce arg_index -> arg_index + + let hash_fold t state = + state + |> Uns.hash_fold (constructor_index t) + |> Uns.hash_fold (arg_index t) + + let cmp t0 t1 = + let open Cmp in + match Uns.cmp (constructor_index t0) (constructor_index t1) with + | Lt -> Lt + | Eq -> Uns.cmp (arg_index t0) (arg_index t1) + | Gt -> Gt + + let to_string = function + | ShiftPrefix state_index -> begin + String.Fmt.empty + |> Fmt.fmt "ShiftPrefix " |> Uns.pp state_index + |> Fmt.to_string + end + | ShiftAccept state_index -> begin + String.Fmt.empty + |> Fmt.fmt "ShiftAccept " |> Uns.pp state_index + |> Fmt.to_string + end + | Reduce prod_index -> begin + String.Fmt.empty + |> Fmt.fmt "Reduce " |> Uns.pp prod_index + |> Fmt.to_string + end + + let pp t formatter = + formatter |> Fmt.fmt (to_string t) + end + include T + include Identifiable.Make(T) + end + + module State = struct + module T = struct + type t = { + lr1ItemsetClosure: Lr1ItemsetClosure.t; + actions: (uns, Action.t, Uns.cmper_witness) Map.t; + gotos: (uns, uns, Uns.cmper_witness) Map.t; + } + + let hash_fold {lr1ItemsetClosure; _} state = + state |> Lr1ItemsetClosure.hash_fold lr1ItemsetClosure + + let cmp {lr1ItemsetClosure=c0; _} {lr1ItemsetClosure=c1; _} = + Lr1ItemsetClosure.cmp c0 c1 + + let pp {lr1ItemsetClosure; actions; gotos} formatter = + formatter + |> Fmt.fmt "{lr1ItemsetClosure=" |> Lr1ItemsetClosure.pp lr1ItemsetClosure + |> Fmt.fmt "; actions=" |> Map.pp Action.pp actions + |> Fmt.fmt "; gotos=" |> Map.pp Uns.pp gotos + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) + + let init ~lr1ItemsetClosure ~actions ~gotos = + {lr1ItemsetClosure; actions; gotos} + end + + let states = [| + (* 0 *) State.init + ~lr1ItemsetClosure:( + Lr1ItemsetClosure.init + ~index:0L + ~kernel:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 8L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [0L] + ) in + lr0item, lr1item + ); + ] + ) + ~added:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 6L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 7L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [1L] + ) in + lr0item, lr1item + ); + ] + ) + ) + ~actions:( + Map.of_alist (module Uns) [ + (6L, Action.ShiftPrefix 1L); + ] + ) + ~gotos:( + Map.of_alist (module Uns) [ + (10L, 2L); + (11L, 3L); + ] + ); + (* 1 *) State.init + ~lr1ItemsetClosure:( + Lr1ItemsetClosure.init + ~index:1L + ~kernel:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 6L prods) ~dot:1L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ] + ) + ~added:( + Lr1Itemset.empty + ) + ) + ~actions:( + Map.of_alist (module Uns) [ + (2L, Action.Reduce 6L); + (3L, Action.Reduce 6L); + (4L, Action.Reduce 6L); + (5L, Action.Reduce 6L); + (7L, Action.Reduce 6L); + ] + ) + ~gotos:( + Map.empty (module Uns) + ); + (* 2 *) State.init + ~lr1ItemsetClosure:( + Lr1ItemsetClosure.init + ~index:2L + ~kernel:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4L prods) ~dot:1L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5L prods) ~dot:1L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 7L prods) ~dot:1L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [1L] + ) in + lr0item, lr1item + ); + ] + ) + ~added:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 0L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [6L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 1L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [6L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 2L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [6L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 3L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [6L] + ) in + lr0item, lr1item + ); + ] + ) + ) + ~actions:( + Map.of_alist (module Uns) [ + (2L, Action.ShiftPrefix 4L); + (3L, Action.ShiftPrefix 5L); + (4L, Action.ShiftPrefix 6L); + (5L, Action.ShiftPrefix 7L); + (7L, Action.ShiftAccept 8L); + ] + ) + ~gotos:( + Map.of_alist (module Uns) [ + (8L, 9L); + (9L, 10L); + ] + ); + (* 3 *) State.init + ~lr1ItemsetClosure:( + Lr1ItemsetClosure.init + ~index:3L + ~kernel:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 8L prods) ~dot:1L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [0L] + ) in + lr0item, lr1item + ); + ] + ) + ~added:( + Lr1Itemset.empty + ) + ) + ~actions:( + Map.of_alist (module Uns) [ + (1L, Action.ShiftPrefix 11L); + ] + ) + ~gotos:( + Map.empty (module Uns) + ); + (* 4 *) State.init + ~lr1ItemsetClosure:( + Lr1ItemsetClosure.init + ~index:4L + ~kernel:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 0L prods) ~dot:1L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [6L] + ) in + lr0item, lr1item + ); + ] + ) + ~added:( + Lr1Itemset.empty + ) + ) + ~actions:( + Map.of_alist (module Uns) [ + (6L, Action.Reduce 0L); + ] + ) + ~gotos:( + Map.empty (module Uns) + ); + (* 5 *) State.init + ~lr1ItemsetClosure:( + Lr1ItemsetClosure.init + ~index:5L + ~kernel:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 1L prods) ~dot:1L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [6L] + ) in + lr0item, lr1item + ); + ] + ) + ~added:( + Lr1Itemset.empty + ) + ) + ~actions:( + Map.of_alist (module Uns) [ + (6L, Action.Reduce 1L); + ] + ) + ~gotos:( + Map.empty (module Uns) + ); + (* 6 *) State.init + ~lr1ItemsetClosure:( + Lr1ItemsetClosure.init + ~index:6L + ~kernel:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 2L prods) ~dot:1L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [6L] + ) in + lr0item, lr1item + ); + ] + ) + ~added:( + Lr1Itemset.empty + ) + ) + ~actions:( + Map.of_alist (module Uns) [ + (6L, Action.Reduce 2L); + ] + ) + ~gotos:( + Map.empty (module Uns) + ); + (* 7 *) State.init + ~lr1ItemsetClosure:( + Lr1ItemsetClosure.init + ~index:7L + ~kernel:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 3L prods) ~dot:1L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [6L] + ) in + lr0item, lr1item + ); + ] + ) + ~added:( + Lr1Itemset.empty + ) + ) + ~actions:( + Map.of_alist (module Uns) [ + (6L, Action.Reduce 3L); + ] + ) + ~gotos:( + Map.empty (module Uns) + ); + (* 8 *) State.init + ~lr1ItemsetClosure:( + Lr1ItemsetClosure.init + ~index:8L + ~kernel:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 7L prods) ~dot:2L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [1L] + ) in + lr0item, lr1item + ); + ] + ) + ~added:( + Lr1Itemset.empty + ) + ) + ~actions:( + Map.of_alist (module Uns) [ + (1L, Action.Reduce 7L); + ] + ) + ~gotos:( + Map.empty (module Uns) + ); + (* 9 *) State.init + ~lr1ItemsetClosure:( + Lr1ItemsetClosure.init + ~index:9L + ~kernel:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4L prods) ~dot:2L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ] + ) + ~added:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 6L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ] + ) + ) + ~actions:( + Map.of_alist (module Uns) [ + (6L, Action.ShiftPrefix 1L); + ] + ) + ~gotos:( + Map.of_alist (module Uns) [ + (10L, 12L); + ] + ); + (* 10 *) State.init + ~lr1ItemsetClosure:( + Lr1ItemsetClosure.init + ~index:10L + ~kernel:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5L prods) ~dot:2L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ] + ) + ~added:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 6L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ] + ) + ) + ~actions:( + Map.of_alist (module Uns) [ + (6L, Action.ShiftPrefix 1L); + ] + ) + ~gotos:( + Map.of_alist (module Uns) [ + (10L, 13L); + ] + ); + (* 11 *) State.init + ~lr1ItemsetClosure:( + Lr1ItemsetClosure.init + ~index:11L + ~kernel:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 8L prods) ~dot:2L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [0L] + ) in + lr0item, lr1item + ); + ] + ) + ~added:( + Lr1Itemset.empty + ) + ) + ~actions:( + Map.of_alist (module Uns) [ + (0L, Action.Reduce 8L); + ] + ) + ~gotos:( + Map.empty (module Uns) + ); + (* 12 *) State.init + ~lr1ItemsetClosure:( + Lr1ItemsetClosure.init + ~index:12L + ~kernel:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4L prods) ~dot:1L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4L prods) ~dot:3L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5L prods) ~dot:1L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ] + ) + ~added:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 0L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [6L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 1L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [6L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 2L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [6L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 3L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [6L] + ) in + lr0item, lr1item + ); + ] + ) + ) + ~actions:( + Map.of_alist (module Uns) [ + (2L, Action.Reduce 4L); + (3L, Action.Reduce 4L); + (4L, Action.Reduce 4L); + (5L, Action.Reduce 4L); + (7L, Action.Reduce 4L); + ] + ) + ~gotos:( + Map.of_alist (module Uns) [ + (8L, 9L); + (9L, 10L); + ] + ); + (* 13 *) State.init + ~lr1ItemsetClosure:( + Lr1ItemsetClosure.init + ~index:13L + ~kernel:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 4L prods) ~dot:1L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5L prods) ~dot:1L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 5L prods) ~dot:3L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [2L; 3L; 4L; 5L; 7L] + ) in + lr0item, lr1item + ); + ] + ) + ~added:( + Lr1Itemset.init [ + ( + let lr0item = Lr0Item.init ~prod:(Array.get 0L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [6L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 1L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [6L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 2L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [6L] + ) in + lr0item, lr1item + ); + ( + let lr0item = Lr0Item.init ~prod:(Array.get 3L prods) ~dot:0L in + let lr1item = Lr1Item.init ~lr0item ~follow:( + Ordset.of_list (module Uns) [6L] + ) in + lr0item, lr1item + ); + ] + ) + ) + ~actions:( + Map.of_alist (module Uns) [ + (2L, Action.ShiftPrefix 4L); + (3L, Action.ShiftPrefix 5L); + (4L, Action.Reduce 5L); + (5L, Action.Reduce 5L); + (7L, Action.Reduce 5L); + ] + ) + ~gotos:( + Map.of_alist (module Uns) [ + (8L, 9L); + (9L, 10L); + ] + ); + |] + end + + module Token = struct + module T = struct + type t = + | EPSILON (* "ε" *) + | PSEUDO_END (* "⊥" *) + | STAR (* "*" *) + | SLASH (* "/" *) + | PLUS (* "+" *) + | MINUS (* "-" *) + | INT of Zint.t + | EOI + + let index = function + | EPSILON -> 0L + | PSEUDO_END -> 1L + | STAR -> 2L + | SLASH -> 3L + | PLUS -> 4L + | MINUS -> 5L + | INT _ -> 6L + | EOI -> 7L + + let hash_fold t state = + state |> Uns.hash_fold (index t) + + let cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + let spec t = + Array.get (index t) Spec.symbols + + let pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + end + include T + include Identifiable.Make(T) + end + + module Nonterm = struct + module T = struct + type t = + | MulOp of Token.t + | AddOp of Token.t + | Expr of Zint.t + | Answer of Zint.t + | Answer' of Zint.t + + let index = function + | MulOp _ -> 8L + | AddOp _ -> 9L + | Expr _ -> 10L + | Answer _ -> 11L + | Answer' _ -> 12L + + let hash_fold t state = + state |> Uns.hash_fold (index t) + + let cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + let spec t = + Array.get (index t) Spec.symbols + + let pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + end + include T + include Identifiable.Make(T) + end + + module Symbol = struct + module T = struct + type t = + | Token of Token.t + | Nonterm of Nonterm.t + + let index = function + | Token token -> Token.index token + | Nonterm nonterm -> Nonterm.index nonterm + + let hash_fold t state = + state |> Uns.hash_fold (index t) + + let cmp t0 t1 = + Uns.cmp (index t0) (index t1) + + let spec = function + | Token token -> Token.spec token + | Nonterm nonterm -> Nonterm.spec nonterm + + let pp t formatter = + formatter + |> Spec.Symbol.pp (spec t) + end + include T + include Identifiable.Make(T) + end + + module State = struct + module T = struct + type t = uns + + let hash_fold t state = + state |> Uns.hash_fold t + + let cmp t0 t1 = + Uns.cmp t0 t1 + + let spec t = + Array.get t Spec.states + + let pp t formatter = + formatter |> Uns.pp t + end + include T + include Identifiable.Make(T) + + let init state_index = + state_index + end + + module Stack = struct + module Elm = struct + module T = struct + type t = { + symbol: Symbol.t; + state: State.t; + } + + let hash_fold {symbol; state} hash_state = + hash_state + |> Symbol.hash_fold symbol + |> State.hash_fold state + + let cmp {symbol=symbol0; state=state0} {symbol=symbol1; state=state1} = + let open Cmp in + match State.cmp state0 state1 with + | Lt -> Lt + | Eq -> Symbol.cmp symbol0 symbol1 + | Gt -> Gt + + let pp {symbol; state} formatter = + formatter + |> Fmt.fmt "{symbol=" |> Symbol.pp symbol + |> Fmt.fmt "; state=" |> State.pp state + |> Fmt.fmt "}" + end + include T + include Identifiable.Make(T) + + let init ~symbol ~state = + {symbol; state} + end + + type t = Elm.t list + + let fmt ?(alt=false) ?(width=0L) t formatter = + formatter |> List.fmt ~alt ~width Elm.pp t + + let pp t formatter = + formatter |> fmt t + + module Reduction = struct + module T = struct + type stack = t + type t = uns + type callback = stack -> Symbol.t * stack + + let hash_fold t state = + state |> Uns.hash_fold t + + let cmp t0 t1 = + Uns.cmp t0 t1 + + let pp t formatter = + formatter |> Uns.pp t + end + include T + include Identifiable.Make(T) + + let callbacks = [| + (* 0 *) (function + | _ + :: tl -> Symbol.Nonterm (MulOp ( + (*______________________________________________________________________________*) +#10 "./Example_ml.hmh" +STAR + (*‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾*) + )), tl + | _ -> not_reached () + ); + (* 1 *) (function + | _ + :: tl -> Symbol.Nonterm (MulOp ( + (*______________________________________________________________________________*) +#11 "./Example_ml.hmh" +SLASH + (*‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾*) + )), tl + | _ -> not_reached () + ); + (* 2 *) (function + | _ + :: tl -> Symbol.Nonterm (AddOp ( + (*______________________________________________________________________________*) +#17 "./Example_ml.hmh" +PLUS + (*‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾*) + )), tl + | _ -> not_reached () + ); + (* 3 *) (function + | _ + :: tl -> Symbol.Nonterm (AddOp ( + (*______________________________________________________________________________*) +#18 "./Example_ml.hmh" +MINUS + (*‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾*) + )), tl + | _ -> not_reached () + ); + (* 4 *) (function + | Elm.{symbol=Symbol.Nonterm (Expr e1); _} + :: Elm.{symbol=Symbol.Nonterm (MulOp op); _} + :: Elm.{symbol=Symbol.Nonterm (Expr e0); _} + :: tl -> Symbol.Nonterm (Expr ( + (*______________________________________________________________________________*) +#23 "./Example_ml.hmh" +match op with + | STAR -> Zint.(e0 * e1) + | SLASH -> Zint.(e0 / e1) + | _ -> not_reached () + (*‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾*) + )), tl + | _ -> not_reached () + ); + (* 5 *) (function + | Elm.{symbol=Symbol.Nonterm (Expr e1); _} + :: Elm.{symbol=Symbol.Nonterm (AddOp op); _} + :: Elm.{symbol=Symbol.Nonterm (Expr e0); _} + :: tl -> Symbol.Nonterm (Expr ( + (*______________________________________________________________________________*) +#28 "./Example_ml.hmh" +match op with + | PLUS -> Zint.(e0 + e1) + | MINUS -> Zint.(e0 - e1) + | _ -> not_reached () + (*‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾*) + )), tl + | _ -> not_reached () + ); + (* 6 *) (function + | Elm.{symbol=Symbol.Token (INT x); _} + :: tl -> Symbol.Nonterm (Expr ( + (*______________________________________________________________________________*) +#32 "./Example_ml.hmh" +x + (*‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾*) + )), tl + | _ -> not_reached () + ); + (* 7 *) (function + | _ + :: Elm.{symbol=Symbol.Nonterm (Expr e); _} + :: tl -> Symbol.Nonterm (Answer ( + (*______________________________________________________________________________*) +#36 "./Example_ml.hmh" +e + (*‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾*) + )), tl + | _ -> not_reached () + ); + (* 8 *) (fun _stack -> not_reached ()) + |] + + let callback t = + Array.get t callbacks + + let init callback_index = + callback_index + end + + let shift ~symbol ~state t = + (Elm.init ~symbol ~state) :: t + + (* val goto: Symbol.t -> t -> t *) + let goto symbol t = + match t with + | [] -> not_reached () + | Elm.{state; _} :: _ -> + let symbol_index = Symbol.index symbol in + let Spec.State.{gotos; _} = Array.get state Spec.states in + let state' = Map.get_hlt symbol_index gotos |> State.init in + shift ~symbol ~state:state' t + + let reduce ~reduction t = + let callback = Reduction.callback reduction in + let symbol, t' = callback t in + goto symbol t' + end + + module Status = struct + module T = struct + type t = + | ShiftPrefix of Token.t * State.t + | ShiftAccept of Token.t * State.t + | Reduce of Token.t * Stack.Reduction.t + | Prefix + | Accept of Nonterm.t + | Reject of Token.t + + let constructor_index = function + | ShiftPrefix _ -> 0L + | ShiftAccept _ -> 1L + | Reduce _ -> 2L + | Prefix -> 3L + | Accept _ -> 4L + | Reject _ -> 5L + + let hash_fold t state = + state + |> Uns.hash_fold (constructor_index t) + |> (fun hash_state -> + match t with + | ShiftPrefix (token, state) + | ShiftAccept (token, state) -> + hash_state |> State.hash_fold state |> Token.hash_fold token + | Reduce (token, reduction) -> + hash_state |> Stack.Reduction.hash_fold reduction |> Token.hash_fold token + | Prefix -> hash_state + | Accept nonterm -> hash_state |> Nonterm.hash_fold nonterm + | Reject token -> hash_state |> Token.hash_fold token + ) + + let cmp t0 t1 = + let open Cmp in + match Uns.cmp (constructor_index t0) (constructor_index t1) with + | Lt -> Lt + | Eq -> begin + match t0, t1 with + | ShiftPrefix (token0, state0), ShiftPrefix (token1, state1) + | ShiftAccept (token0, state0), ShiftAccept (token1, state1) -> begin + match State.cmp state0 state1 with + | Lt -> Lt + | Eq -> Token.cmp token0 token1 + | Gt -> Gt + end + | Reduce (token0, reduction0), Reduce (token1, reduction1) + -> begin + match Stack.Reduction.cmp reduction0 reduction1 with + | Lt -> Lt + | Eq -> Token.cmp token0 token1 + | Gt -> Gt + end + | Prefix, Prefix -> Eq + | Accept nonterm0, Accept nonterm1 -> Nonterm.cmp nonterm0 nonterm1 + | Reject token0, Reject token1 -> Token.cmp token0 token1 + | _, _ -> not_reached () + end + | Gt -> Gt + + let pp t formatter = + formatter + |> (fun formatter -> + match t with + | ShiftPrefix (token, state) -> begin + formatter + |> Fmt.fmt "ShiftPrefix (" |> Token.pp token + |> Fmt.fmt ", " |> State.pp state + |> Fmt.fmt ")" + end + | ShiftAccept (token, state) -> begin + formatter + |> Fmt.fmt "ShiftAccept (" |> Token.pp token + |> Fmt.fmt ", " |> State.pp state + |> Fmt.fmt ")" + end + | Reduce (token, reduction) -> begin + formatter + |> Fmt.fmt "Reduce (" |> Token.pp token + |> Fmt.fmt ", " |> Stack.Reduction.pp reduction + |> Fmt.fmt ")" + end + | Prefix -> formatter |> Fmt.fmt "Prefix" + | Accept nonterm -> formatter |> Fmt.fmt "Accept " |> Nonterm.pp nonterm + | Reject token -> formatter |> Fmt.fmt "Reject " |> Token.pp token + ) + end + include T + include Identifiable.Make(T) + end + + type t = { + stack: Stack.t; + status: Status.t; + } + + module Start = struct + module Answer = struct + let boi = { + stack=[{ + symbol=Token Token.EPSILON; + state=State.init 0L; + }]; + status=Prefix; + } + end + end + + let feed token = function + | {stack={state; _} :: _; status=Prefix} as t -> begin + let token_index = Token.index token in + let Spec.State.{actions; _} = Array.get state Spec.states in + let status = match Map.get token_index actions with + | Some (Spec.Action.ShiftPrefix state') -> Status.ShiftPrefix (token, state') + | Some (Spec.Action.ShiftAccept state') -> Status.ShiftAccept (token, state') + | Some (Spec.Action.Reduce prod_index) -> begin + let Spec.Prod.{callback=callback_index; _} = Array.get prod_index Spec.prods in + let reduction = Stack.Reduction.init callback_index in + Status.Reduce (token, reduction) + end + | None -> Status.Reject token + in + {t with status} + end + | _ -> not_reached () + + let step {stack; status} = + let open Status in + match status with + | ShiftPrefix (token, state) -> + {stack=Stack.shift ~symbol:(Token token) ~state stack; status=Prefix} + | ShiftAccept (token, state) -> begin + (* Shift, perform the ⊥ reduction, and extract the accepted symbol from the stack. *) + let stack = Stack.shift ~symbol:(Token token) ~state stack in + let pseudo_end_index = Token.index Token.PSEUDO_END in + let Spec.State.{actions; _} = Array.get state Spec.states in + match Map.get_hlt pseudo_end_index actions with + | Spec.Action.Reduce prod_index -> begin + let Spec.Prod.{callback=callback_index; _} = Array.get prod_index Spec.prods in + let reduction = Stack.Reduction.init callback_index in + let stack = Stack.reduce ~reduction stack in + match stack with + | [] -> not_reached () + | {symbol=Token _; _} :: _ -> not_reached () + | {symbol=Nonterm nonterm; _} :: _ -> {stack=[]; status=Accept nonterm} + end + | _ -> not_reached () + end + | Reduce (token, reduction) -> begin + feed token {stack=Stack.reduce ~reduction stack; status=Prefix} + end + | _ -> not_reached () + + (* val walk: t -> t *) + let rec walk ({status; _} as t) = + let open Status in + match status with + | ShiftPrefix _ + | ShiftAccept _ + | Reduce _ -> t |> step |> walk + | Prefix + | Accept _ + | Reject _ -> t + + let next token ({status; _} as t) = + match status with + | Status.Prefix -> t |> feed token |> walk + | _ -> not_reached () + end +#37 "./Example_ml.hmh" + +(* Tokenize `s`, e.g. "2 + 3 * 4", and append an `EOI` token. *) +let tokenize s = + s |> String.split_rev ~f:(fun cp -> Codepoint.(cp = (of_char ' '))) + |> List.rev_filter ~f:(fun s -> not (String.is_empty s)) + |> List.rev_map ~f:(fun s -> + let open Token in + match s with + | "*" -> STAR + | "/" -> SLASH + | "+" -> PLUS + | "-" -> MINUS + | _ -> INT (Zint.of_string s) + ) + |> List.push Token.EOI + |> List.rev + +(* Calculate the result of the arithmetic expression expressed in `s`, e.g. "2 + 3 * 4". *) +let calculate s = + let {status; _} = List.fold_until (tokenize s) ~init:Start.Answer.boi ~f:(fun parser tok -> + let {status; _} as parser' = next tok parser in + let is_done = match status with + | Prefix -> false + | Accept _ + | Reject _ -> true + | _ -> not_reached () + in + parser', is_done + ) in + match status with + | Accept (Answer answer) -> answer + | Prefix -> halt "Partial input" + | Reject _ -> halt "Parse error" + | _ -> not_reached () + +let main () = + File.Fmt.stdout + |> Zint.pp (calculate "2 + 3 * 4") + |> ignore + +let _ = main () diff --git a/bootstrap/test/hocc/Example_ml.expected.mli b/bootstrap/test/hocc/Example_ml.expected.mli new file mode 100644 index 000000000..0d37899d5 --- /dev/null +++ b/bootstrap/test/hocc/Example_ml.expected.mli @@ -0,0 +1,262 @@ +(* This file was generated by `hocc` based on "Example_ml.hmhi" *) +open! Basis +open! Basis.Rudiments + +(* Export the parser API so that alternatives to `calculate` can be implemented. `hocc` expands to a + * module signature. *) +include sig + module Spec : sig + module Algorithm : sig + type t = + | Lr1 (** LR(1) algorithm. *) + | Ielr1 (** IELR(1) algorithm. *) + | Pgm1 (** PGM(1) algorithm. *) + | Lalr1 (** LALR(1) algorithm. *) + + include IdentifiableIntf.S with type t := t + end + + val algorithm: Algorithm.t + (** Algorithm used to generate parser. *) + + module Assoc : sig + type t = + | Left + | Right + + include IdentifiableIntf.S with type t := t + end + + module Prec : sig + type t = { + index: uns; (* Index in `precs` array. *) + name: string; + assoc: Assoc.t option; + doms: (uns, Uns.cmper_witness) Ordset.t; (* Indices in `precs` array of dominator + * precedences. *) + } + + include IdentifiableIntf.S with type t := t + end + + val precs: Prec.t array + (** Array of precedences, where each element's `index` field corresponds to the element's + array index. *) + + module Prod : sig + type t = { + index: uns; (* Index in `prods` array. *) + lhs_index: uns; + rhs_indexes: uns array; + prec: Prec.t option; + callback: uns; (* Index of reduction callback in `Stack.Reduction.callbacks`. *) + } + + include IdentifiableIntf.S with type t := t + end + + val prods: Prod.t array + (** Array of productions, where each element's `index` field corresponds to the element's + array index. *) + + module Symbol : sig + type t = { + index: uns; (* Index in `symbols` array. *) + name: string; + prec: Prec.t option; + alias: string option; + start: bool; + prods: (Prod.t, Prod.cmper_witness) Ordset.t; (* empty ≡ token *) + first: (uns, Uns.cmper_witness) Ordset.t; + follow: (uns, Uns.cmper_witness) Ordset.t; + } + + include IdentifiableIntf.S with type t := t + end + + val symbols: Symbol.t array + (** Array of symbols, where each element's `index` field corresponds to the element's + array index. *) + + module Lr0Item : sig + type t = { + prod: Prod.t; + dot: uns; + } + + include IdentifiableIntf.S with type t := t + end + + module Lr1Item : sig + type t = { + lr0item: Lr0Item.t; + follow: (uns, Uns.cmper_witness) Ordset.t; + } + + include IdentifiableIntf.S with type t := t + end + + module Lr1Itemset : sig + type t = (Lr0Item.t, Lr1Item.t, Lr0Item.cmper_witness) Ordmap.t + + include IdentifiableIntf.S with type t := t + end + + module Lr1ItemsetClosure : sig + type t = { + index: uns; (* Index of corresponding `State.t` in `states` array. *) + kernel: Lr1Itemset.t; + added: Lr1Itemset.t; + } + + include IdentifiableIntf.S with type t := t + end + + module Action : sig + type t = + | ShiftPrefix of uns (* `states` index. *) + | ShiftAccept of uns (* `states` index. *) + | Reduce of uns (* `prods` index. *) + + include IdentifiableIntf.S with type t := t + end + + module State : sig + type t = { + lr1ItemsetClosure: Lr1ItemsetClosure.t; + actions: (uns, Action.t, Uns.cmper_witness) Map.t; + gotos: (uns, uns, Uns.cmper_witness) Map.t; + } + + include IdentifiableIntf.S with type t := t + end + + val states: State.t array + (** Array of CFSM states, where each element's `lr1ItemsetClosure.index` field corresponds + to the element's array index. *) + end + + module Token : sig + type t = + | EPSILON (* "ε" *) + | PSEUDO_END (* "⊥" *) + | STAR (* "*" *) + | SLASH (* "/" *) + | PLUS (* "+" *) + | MINUS (* "-" *) + | INT of Zint.t + | EOI + + include IdentifiableIntf.S with type t := t + + val spec: t -> Spec.Symbol.t + end + + module Nonterm : sig + type t = + | MulOp of Token.t + | AddOp of Token.t + | Expr of Zint.t + | Answer of Zint.t + | Answer' of Zint.t + + include IdentifiableIntf.S with type t := t + + val spec: t -> Spec.Symbol.t + end + + module Symbol : sig + type t = + | Token of Token.t + | Nonterm of Nonterm.t + + include IdentifiableIntf.S with type t := t + + val spec: t -> Spec.Symbol.t + end + + module State : sig + type t = uns + + include IdentifiableIntf.S with type t := t + + val spec: t -> Spec.State.t + end + + module Stack : sig + module Elm : sig + type t = { + symbol: Symbol.t; + state: State.t; + } + + include IdentifiableIntf.S with type t := t + end + + type t = Elm.t list + + val pp: t -> (module Fmt.Formatter) -> (module Fmt.Formatter) + val fmt: ?alt:bool -> ?width:uns -> t -> (module Fmt.Formatter) -> (module Fmt.Formatter) + + module Reduction : sig + type stack = t + type t + type callback = stack -> Symbol.t * stack + + include IdentifiableIntf.S with type t := t + + val callbacks: callback array + (** Array of reduction callback functions containing embedded parser code. *) + + val callback: t -> callback + end + + val shift: symbol:Symbol.t -> state:State.t -> t -> t + (** Perform a shift. *) + + val reduce: reduction:Reduction.t -> t -> t + (** Perform a reduction. *) + end + + module Status : sig + type t = + (* `feed`/`step` may produce these variants; `next` fast-forwards over them. *) + | ShiftPrefix of Token.t * State.t + | ShiftAccept of Token.t * State.t + | Reduce of Token.t * Stack.Reduction.t + (* Common variants. *) + | Prefix (** Valid parse prefix; more input needed. *) + | Accept of Nonterm.t (** Successful parse result. *) + | Reject of Token.t (** Syntax error due to unexpected token. *) + + include IdentifiableIntf.S with type t := t + end + + type t = { + stack: Stack.t; + status: Status.t; + } + + module Start : sig + module Answer : sig + val boi: t + end + end + + val feed: Token.t -> t -> t + (** `feed token t` returns a result with status in {`ShiftPrefix`, `ShiftAccept`, `Reduce`, + `Reject`}. `t.status` must be `Prefix`. *) + + val step: t -> t + (** `step t` returns the result of applying one state transition to `t`. `t.status` must be in + {`ShiftPrefix`, `ShiftAccept`, `Reduce`}. *) + + val next: Token.t -> t -> t + (** `next token t` calls `feed token t` and fast-forwards via `step` calls to return a result + with status in {`Prefix`, `Accept`, `Reject`}. `t.status` must be `Prefix`. *) + end +#7 "./Example_ml.hmhi" + +val calculate: string -> Zint.t + (** Calculate the result of a simple arithmetic expression comprising non-negative integers and + `+`, `-`, `*`, and `/` operators. Tokens must be separated by one or more spaces. *) diff --git a/bootstrap/test/hocc/Example_ml.expected.txt b/bootstrap/test/hocc/Example_ml.expected.txt new file mode 100644 index 000000000..934e24826 --- /dev/null +++ b/bootstrap/test/hocc/Example_ml.expected.txt @@ -0,0 +1,197 @@ +Example_ml grammar + +Precedences + left mul + left add < mul +Tokens + token EPSILON "ε" + First: {"ε"} + Follow: {} + token PSEUDO_END "⊥" + First: {"⊥"} + Follow: {"ε"} + token STAR "*" prec mul + First: {"*"} + Follow: {INT} + token SLASH "/" prec mul + First: {"/"} + Follow: {INT} + token PLUS "+" prec add + First: {"+"} + Follow: {INT} + token MINUS "-" prec add + First: {"-"} + Follow: {INT} + token INT of Zint.t + First: {INT} + Follow: {"*", "/", "+", "-", EOI} + token EOI + First: {EOI} + Follow: {"⊥"} +Non-terminals + nonterm MulOp of Token.t + First: {"*", "/"} + Follow: {INT} + Productions + MulOp ::= "*" + MulOp ::= "/" + nonterm AddOp of Token.t + First: {"+", "-"} + Follow: {INT} + Productions + AddOp ::= "+" + AddOp ::= "-" + nonterm Expr of Zint.t + First: {INT} + Follow: {"*", "/", "+", "-", EOI} + Productions + Expr ::= Expr MulOp Expr prec mul + Expr ::= Expr AddOp Expr prec add + Expr ::= INT + start Answer of Zint.t + First: {INT} + Follow: {"⊥"} + Productions + Answer ::= Expr EOI + start Answer' of Zint.t + First: {INT} + Follow: {"ε"} + Productions + Answer' ::= Answer "⊥" +LR(1) States + State 0 [0.0] + Kernel + [Answer' ::= · Answer "⊥", {"ε"}] + Added + [Expr ::= · Expr MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= · Expr AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Expr ::= · INT, {"*", "/", "+", "-", EOI}] + [Answer ::= · Expr EOI, {"⊥"}] + Actions + INT : ShiftPrefix 1 + Gotos + Expr : 2 + Answer : 3 + State 1 [1.0] + Kernel + [Expr ::= INT ·, {"*", "/", "+", "-", EOI}] + Actions + "*" : Reduce Expr ::= INT + "/" : Reduce Expr ::= INT + "+" : Reduce Expr ::= INT + "-" : Reduce Expr ::= INT + EOI : Reduce Expr ::= INT + State 2 [2.0] + Kernel + [Expr ::= Expr · MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= Expr · AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Answer ::= Expr · EOI, {"⊥"}] + Added + [MulOp ::= · "*", {INT}] + [MulOp ::= · "/", {INT}] + [AddOp ::= · "+", {INT}] + [AddOp ::= · "-", {INT}] + Actions + "*" : ShiftPrefix 4 prec mul + "/" : ShiftPrefix 5 prec mul + "+" : ShiftPrefix 6 prec add + "-" : ShiftPrefix 7 prec add + EOI : ShiftAccept 8 + Gotos + MulOp : 9 + AddOp : 10 + State 3 [3.0] + Kernel + [Answer' ::= Answer · "⊥", {"ε"}] + Actions + "⊥" : ShiftPrefix 11 + State 4 [4.0] + Kernel + [MulOp ::= "*" ·, {INT}] + Actions + INT : Reduce MulOp ::= "*" + State 5 [5.0] + Kernel + [MulOp ::= "/" ·, {INT}] + Actions + INT : Reduce MulOp ::= "/" + State 6 [6.0] + Kernel + [AddOp ::= "+" ·, {INT}] + Actions + INT : Reduce AddOp ::= "+" + State 7 [7.0] + Kernel + [AddOp ::= "-" ·, {INT}] + Actions + INT : Reduce AddOp ::= "-" + State 8 [8.0] + Kernel + [Answer ::= Expr EOI ·, {"⊥"}] + Actions + "⊥" : Reduce Answer ::= Expr EOI + State 9 [9.0] + Kernel + [Expr ::= Expr MulOp · Expr, {"*", "/", "+", "-", EOI}] prec mul + Added + [Expr ::= · Expr MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= · Expr AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Expr ::= · INT, {"*", "/", "+", "-", EOI}] + Actions + INT : ShiftPrefix 1 + Gotos + Expr : 12 + State 10 [10.0] + Kernel + [Expr ::= Expr AddOp · Expr, {"*", "/", "+", "-", EOI}] prec add + Added + [Expr ::= · Expr MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= · Expr AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Expr ::= · INT, {"*", "/", "+", "-", EOI}] + Actions + INT : ShiftPrefix 1 + Gotos + Expr : 13 + State 11 [11.0] + Kernel + [Answer' ::= Answer "⊥" ·, {"ε"}] + Actions + "ε" : Reduce Answer' ::= Answer "⊥" + State 12 [12.0] + Kernel + [Expr ::= Expr · MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= Expr MulOp Expr ·, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= Expr · AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + Added + [MulOp ::= · "*", {INT}] + [MulOp ::= · "/", {INT}] + [AddOp ::= · "+", {INT}] + [AddOp ::= · "-", {INT}] + Actions + "*" : Reduce Expr ::= Expr MulOp Expr prec mul + "/" : Reduce Expr ::= Expr MulOp Expr prec mul + "+" : Reduce Expr ::= Expr MulOp Expr prec mul + "-" : Reduce Expr ::= Expr MulOp Expr prec mul + EOI : Reduce Expr ::= Expr MulOp Expr prec mul + Gotos + MulOp : 9 + AddOp : 10 + State 13 [13.0] + Kernel + [Expr ::= Expr · MulOp Expr, {"*", "/", "+", "-", EOI}] prec mul + [Expr ::= Expr · AddOp Expr, {"*", "/", "+", "-", EOI}] prec add + [Expr ::= Expr AddOp Expr ·, {"*", "/", "+", "-", EOI}] prec add + Added + [MulOp ::= · "*", {INT}] + [MulOp ::= · "/", {INT}] + [AddOp ::= · "+", {INT}] + [AddOp ::= · "-", {INT}] + Actions + "*" : ShiftPrefix 4 prec mul + "/" : ShiftPrefix 5 prec mul + "+" : Reduce Expr ::= Expr AddOp Expr prec add + "-" : Reduce Expr ::= Expr AddOp Expr prec add + EOI : Reduce Expr ::= Expr AddOp Expr prec add + Gotos + MulOp : 9 + AddOp : 10 diff --git a/bootstrap/test/hocc/Example_ml.hmh b/bootstrap/test/hocc/Example_ml.hmh new file mode 100644 index 000000000..c0c63ac61 --- /dev/null +++ b/bootstrap/test/hocc/Example_ml.hmh @@ -0,0 +1,77 @@ +open Basis +open! Basis.Rudiments + +(* Specify the parser. `hocc ...` expands to a module implementation, `{ ... }`. *) +include hocc + left mul + token STAR "*" prec mul + token SLASH "/" prec mul + nonterm MulOp of Token.t ::= + | "*" -> STAR + | "/" -> SLASH + + left add < mul + token PLUS "+" prec add + token MINUS "-" prec add + nonterm AddOp of Token.t ::= + | "+" -> PLUS + | "-" -> MINUS + + token INT of Zint.t + nonterm Expr of Zint.t ::= + | e0:Expr op:MulOp e1:Expr prec mul -> + match op with + | STAR -> Zint.(e0 * e1) + | SLASH -> Zint.(e0 / e1) + | _ -> not_reached () + | e0:Expr op:AddOp e1:Expr prec add -> + match op with + | PLUS -> Zint.(e0 + e1) + | MINUS -> Zint.(e0 - e1) + | _ -> not_reached () + | x:INT -> x + + token EOI + start Answer of Zint.t ::= + | e:Expr EOI -> e + +(* Tokenize `s`, e.g. "2 + 3 * 4", and append an `EOI` token. *) +let tokenize s = + s |> String.split_rev ~f:(fun cp -> Codepoint.(cp = (of_char ' '))) + |> List.rev_filter ~f:(fun s -> not (String.is_empty s)) + |> List.rev_map ~f:(fun s -> + let open Token in + match s with + | "*" -> STAR + | "/" -> SLASH + | "+" -> PLUS + | "-" -> MINUS + | _ -> INT (Zint.of_string s) + ) + |> List.push Token.EOI + |> List.rev + +(* Calculate the result of the arithmetic expression expressed in `s`, e.g. "2 + 3 * 4". *) +let calculate s = + let {status; _} = List.fold_until (tokenize s) ~init:Start.Answer.boi ~f:(fun parser tok -> + let {status; _} as parser' = next tok parser in + let is_done = match status with + | Prefix -> false + | Accept _ + | Reject _ -> true + | _ -> not_reached () + in + parser', is_done + ) in + match status with + | Accept (Answer answer) -> answer + | Prefix -> halt "Partial input" + | Reject _ -> halt "Parse error" + | _ -> not_reached () + +let main () = + File.Fmt.stdout + |> Zint.pp (calculate "2 + 3 * 4") + |> ignore + +let _ = main () diff --git a/bootstrap/test/hocc/Example_ml.hmhi b/bootstrap/test/hocc/Example_ml.hmhi new file mode 100644 index 000000000..a53e297d3 --- /dev/null +++ b/bootstrap/test/hocc/Example_ml.hmhi @@ -0,0 +1,10 @@ +open! Basis +open! Basis.Rudiments + +(* Export the parser API so that alternatives to `calculate` can be implemented. `hocc` expands to a + * module signature. *) +include hocc + +val calculate: string -> Zint.t + (** Calculate the result of a simple arithmetic expression comprising non-negative integers and + `+`, `-`, `*`, and `/` operators. Tokens must be separated by one or more spaces. *) diff --git a/bootstrap/test/hocc/Example_rno.expected.txt b/bootstrap/test/hocc/Example_rno.expected.txt index a94b6a5fd..4ffc9e26e 100644 --- a/bootstrap/test/hocc/Example_rno.expected.txt +++ b/bootstrap/test/hocc/Example_rno.expected.txt @@ -53,7 +53,7 @@ Non-terminals Follow: {"⊥"} Productions Answer ::= Expr EOI - start Answer' + start Answer' of Zint.t First: {INT} Follow: {"ε"} Productions diff --git a/bootstrap/test/hocc/Parse_a.hmh b/bootstrap/test/hocc/Parse_a.hmh index 495a5437c..f70cd0a4d 100644 --- a/bootstrap/test/hocc/Parse_a.hmh +++ b/bootstrap/test/hocc/Parse_a.hmh @@ -47,7 +47,7 @@ include hocc | epsilon -> () nonterm N7 of Unit.t ::= - | o:OP _:N1 N2 -> foo + | OP n:N1 _:N2 -> foo Code = { } diff --git a/bootstrap/test/hocc/dune b/bootstrap/test/hocc/dune index 5d24cacd5..b7805617c 100644 --- a/bootstrap/test/hocc/dune +++ b/bootstrap/test/hocc/dune @@ -248,6 +248,18 @@ (alias runtest) (action (diff Parse_error_hmhi.expected Parse_error_hmhi.out))) +(rule + (deps + (glob_files Binding_error.hmh*) + %{bin:hocc}) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Binding_error.out (run ./hocc_test %{bin:hocc} Binding_error -v))))) +(rule + (alias runtest) + (action (diff Binding_error.expected Binding_error.out))) + (rule (deps (glob_files Unused.hmh*) @@ -523,17 +535,53 @@ (deps (glob_files Example.hmh*) %{bin:hocc}) - (targets Example.out.txt) + (targets Example.out.txt Example.hmi Example.hm) (action (with-accepted-exit-codes (or 0 1) - (with-outputs-to Example.out (run ./hocc_test %{bin:hocc} Example -v -txt))))) + (with-outputs-to Example.out (run ./hocc_test %{bin:hocc} Example -v -txt -hm))))) (rule (alias runtest) (action (progn (diff Example.expected Example.out) - (diff Example.expected.txt Example.out.txt)))) + (diff Example.expected.txt Example.out.txt) + (diff Example.expected.hmi Example.hmi) + (diff Example.expected.hm Example.hm)))) + +(rule + (deps + (glob_files Example_b.hmh*) + %{bin:hocc}) + (targets Example_b.out.txt Example_b.hmi Example_b.hm) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Example_b.out (run ./hocc_test %{bin:hocc} Example_b -v -hm))))) +(rule + (alias runtest) + (action + (progn + (diff Example_b.expected Example_b.out) + (diff Example_b.expected.hmi Example_b.hmi) + (diff Example_b.expected.hm Example_b.hm)))) + +(rule + (deps + (glob_files Example_c.hmh*) + %{bin:hocc}) + (targets Example_c.out.txt Example_c.hmi Example_c.hm) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Example_c.out (run ./hocc_test %{bin:hocc} Example_c -v -hm))))) +(rule + (alias runtest) + (action + (progn + (diff Example_c.expected Example_c.out) + (diff Example_c.expected.hmi Example_c.hmi) + (diff Example_c.expected.hm Example_c.hm)))) (rule (deps @@ -551,6 +599,27 @@ (diff Example_rno.expected Example_rno.out) (diff Example_rno.expected.txt Example_rno.out.txt)))) +(rule + (deps + (glob_files Example_ml.hmh*) + %{bin:hocc}) + (targets Example_ml.out.txt Example_ml.mli Example_ml.ml) + (action + (with-accepted-exit-codes + (or 0 1) + (with-outputs-to Example_ml.out (run ./hocc_test %{bin:hocc} Example_ml -txt -ml))))) +(rule + (alias runtest) + (action + (progn + (diff Example_ml.expected.txt Example_ml.out.txt) + (diff Example_ml.expected.mli Example_ml.mli) + (diff Example_ml.expected.ml Example_ml.ml)))) +(tests + (names + Example_ml) + (libraries Basis)) + (rule (deps (glob_files Hocc.hmh*) diff --git a/bootstrap/test/hocc/help_a.expected b/bootstrap/test/hocc/help_a.expected index 2f8017bb0..9d739160a 100644 --- a/bootstrap/test/hocc/help_a.expected +++ b/bootstrap/test/hocc/help_a.expected @@ -5,12 +5,11 @@ Parameters: -h[elp] : Print command usage and exit. -v[erbose] : Print progress information during parser generation. -txt | -text : Write a detailed automoton description in plain text - format to "/hocc/.txt". + format to "/hocc/.txt". -html : Write a detailed automoton description in internally - hyperlinked HTML format to - "/hocc/.html". + hyperlinked HTML format to "/hocc/.html". -hmh | -hocc : Write a complete grammar specification in hocc format to - "/hocc/.hmh", but with all non-terminal + "/hocc/.hmh", but with all non-terminal types and reduction code omitted. -a[lgorithm] : Use the specified orithm for generating an automoton. Defaults to lr1. @@ -26,9 +25,9 @@ Parameters: -r[esolve] (yes|no) : Control whether conflict resolution is enabled. Defaults to yes. -hm | -hemlock : Generate a Hemlock-based parser implementation and write - it to "/.hm[i]". + it to "/.hm[i]". -ml | -ocaml : Generate an OCaml-based parser implementation and write it - to "/.ml[i]". This is brittle + to "/.ml[i]". This is brittle functionality intended only for Hemlock bootstrapping. -s[rc] : Path and module name of input source, where inputs match ".hmh[i]" and "" comprises the source directory diff --git a/bootstrap/test/hocc/help_b.expected b/bootstrap/test/hocc/help_b.expected index 2f8017bb0..9d739160a 100644 --- a/bootstrap/test/hocc/help_b.expected +++ b/bootstrap/test/hocc/help_b.expected @@ -5,12 +5,11 @@ Parameters: -h[elp] : Print command usage and exit. -v[erbose] : Print progress information during parser generation. -txt | -text : Write a detailed automoton description in plain text - format to "/hocc/.txt". + format to "/hocc/.txt". -html : Write a detailed automoton description in internally - hyperlinked HTML format to - "/hocc/.html". + hyperlinked HTML format to "/hocc/.html". -hmh | -hocc : Write a complete grammar specification in hocc format to - "/hocc/.hmh", but with all non-terminal + "/hocc/.hmh", but with all non-terminal types and reduction code omitted. -a[lgorithm] : Use the specified orithm for generating an automoton. Defaults to lr1. @@ -26,9 +25,9 @@ Parameters: -r[esolve] (yes|no) : Control whether conflict resolution is enabled. Defaults to yes. -hm | -hemlock : Generate a Hemlock-based parser implementation and write - it to "/.hm[i]". + it to "/.hm[i]". -ml | -ocaml : Generate an OCaml-based parser implementation and write it - to "/.ml[i]". This is brittle + to "/.ml[i]". This is brittle functionality intended only for Hemlock bootstrapping. -s[rc] : Path and module name of input source, where inputs match ".hmh[i]" and "" comprises the source directory diff --git a/doc/tools/hocc.md b/doc/tools/hocc.md index e306a01f8..16340ae17 100644 --- a/doc/tools/hocc.md +++ b/doc/tools/hocc.md @@ -22,7 +22,7 @@ interoperating with [Hemlock](https://github.com/BranchTaken/Hemlock) rather tha supports arbitrarily many directed acyclic precedence graphs. Given this more powerful conflict resolution mechanism, `hocc` refuses to generate parsers for ambiguous grammars. - `hocc` supports an automated error recovery algorithm [^diekmann2020] based on minimum-cost repair - sequences. + sequences. [XXX Not implemented.] ## Command usage @@ -33,11 +33,11 @@ Parameters: - `-h[elp]`: Print command usage and exit. - `-v[erbose]`: Print progress information during parser generation. - `-txt` | `-text`: Write a detailed automoton description in plain text format to - `/hocc/.txt`. + `/hocc/.txt`. - `-html`: Write a detailed automoton description in internally hyperlinked HTML format to - `/hocc/.html`. + `/hocc/.html`. - `-hmh` | `-hocc`: Write a complete grammar specification in `hocc` format to - `/hocc/.hmh`, but with all non-terminal types and reduction code omitted. + `/hocc/.hmh`, but with all non-terminal types and reduction code omitted. - `-a[lgorithm] `: Use the specified ``orithm for generating an automoton. Defaults to `lr1`. + `lr1`: Canonical LR(1) automoton [^knuth1965]. @@ -48,9 +48,9 @@ Parameters: + `lalr1`: LALR(1) automoton [^deremer1969]. - `-r[esolve] (yes|no)`: Control whether conflict resolution is enabled. Defaults to `yes`. - `-hm` | `-hemlock`: Generate a Hemlock-based parser implementation and write it to - `/.hm[i]`. + `/.hm[i]`. - `-ml` | `-ocaml`: Generate an OCaml-based parser implementation and write it to - `/.ml[i]`. This is brittle functionality intended only for Hemlock + `/.ml[i]`. This is brittle functionality intended only for Hemlock bootstrapping. - `-s[rc] `: Path and module name of input source, where inputs match `.hmh[i]` and `` comprises the source directory and module name, `[/]`. @@ -71,12 +71,12 @@ Example invocations: ## Parser specification The `hocc` specification grammar is layered onto Hemlock's grammar via the addition of several -keywords: +keywords and one operator: - Parser: `hocc` - Symbols: + [Tokens](#tokens): `token` - + [Non-terminals](#non-terminals): `nonterm`, `start` + + [Non-terminals](#non-terminals): `nonterm`, `start`, `::=` + [Productions](#productions): `epsilon` - [Precedence](#precedence): `neutral`, `left`, `right`, `prec` @@ -441,7 +441,7 @@ open import Basis # module signature. include hocc -calulate: string -> zint +calculate: string -> zint [@@doc "Calculate the result of a simple arithmetic expression comprising non-negative integers and `+`, `-`, `*`, and `/` operators. Tokens must be separated by one or more spaces."] ``` @@ -471,12 +471,14 @@ include hocc nonterm Expr of Zint.t ::= | e0:Expr op:MulOp e1:Expr prec mul -> match op with - | MulOp STAR -> Zint.(e0 * e1) - | MulOp SLASH -> Zint.(e0 / e1) + | STAR -> Zint.(e0 * e1) + | SLASH -> Zint.(e0 / e1) + | _ -> not_reached () | e0:Expr op:AddOp e1:Expr prec add -> match op with - | AddOp PLUS -> Zint.(e0 + e1) - | AddOp MINUS -> Zint.(e0 - e1) + | PLUS -> Zint.(e0 + e1) + | MINUS -> Zint.(e0 - e1) + | _ -> not_reached () | x:INT -> x token EOI @@ -485,8 +487,8 @@ include hocc # Tokenize `s`, e.g. "2 + 3 * 4", and append an `EOI` token. tokenize s = - s |> String.split_rev ~f:(fn cp -> Codepoint.O.(cp = ' ')) - |> List.rev_filter ~f:(fn s -> String.length s <> 0) + s |> String.split_rev ~f:(fn cp -> Codepoint.(cp = ' ')) + |> List.rev_filter ~f:(fn s -> not (String.is_empty s)) |> List.rev_map ~f:fn s -> let open Token match s with @@ -500,18 +502,19 @@ tokenize s = # Calculate the result of the arithmetic expression expressed in `s`, e.g. "2 + 3 * 4". calculate s = - List.fold_until (tokenize s) ~init:Start.Answer.boi ~f:fn parser tok -> - let parser' = Start.Answer.next tok parser - let done = match status parser' with + let {status; _} = List.fold_until (tokenize s) ~init:Start.Answer.boi ~f:fn parser tok -> + let {status; _} as parser' = Start.Answer.next tok parser + let done = match status with | Prefix -> false | Accept _ | Error _ -> true + | _ -> not_reached () parser', done - |> - function - | Accept answer -> answer - | Prefix _ -> halt "Partial input" - | Error _ -> halt "Parse error" + match status with + | Accept (Answer answer) -> answer + | Prefix _ -> halt "Partial input" + | Error _ -> halt "Parse error" + | _ -> not_reached () ``` To generate Hemlock code from the above inputs, run `hocc -hm -s Example`. @@ -534,12 +537,25 @@ parser states can be used as persistent reusable snapshots. ```hemlock { Spec = { + Algorithm = { + type t: t = + | Lr1 [@doc "LR(1) algorithm."] + | Ielr1 [@doc "IELR(1) algorithm."] + | Pgm1 [@doc "PGM(1) algorithm."] + | Lalr1 [@doc "LALR(1) algorithm."] + + include IdentifiableIntf.S with type t := t + } + + algorithm: Algorithm.t + [@@doc "Algorithm used to generate parser."] + Assoc = { type t: t = | Left | Right - pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + include IdentifiableIntf.S with type t := t } Prec = { @@ -547,10 +563,11 @@ parser states can be used as persistent reusable snapshots. index: uns # Index in `precs` array. name: string assoc: option Assoc.t - doms: Ordset.t uns # Indices in `precs` array of dominator precedences. + doms: Ordset.t uns Uns.cmper_witness (* Indices in `precs` array of dominator + * precedences. *) } - pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + include IdentifiableIntf.S with type t := t } precs: array Prec.t @@ -563,12 +580,10 @@ parser states can be used as persistent reusable snapshots. lhs_index: uns rhs_indexes: array uns prec: option Prec.t - reduction: uns # Index of corresponding reduction function in `reductions` array. + callback: uns # Index of reduction callback in `Stack.Reduction.callbacks`. } - hash_map: t -> Hash.State.t -> Hash.State.t - cmp: t -> t -> Cmp.t - pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + include IdentifiableIntf.S with type t := t } prods: array Prod.t @@ -587,9 +602,7 @@ parser states can be used as persistent reusable snapshots. follow: Ordset.t uns Uns.cmper_witness } - hash_map: t -> Hash.State.t -> Hash.State.t - cmp: t -> t -> Cmp.t - pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + include IdentifiableIntf.S with type t := t } symbols: array Symbol.t @@ -602,17 +615,7 @@ parser states can be used as persistent reusable snapshots. dot: uns } - hash_map: t -> Hash.State.t -> Hash.State.t - cmp: t -> t -> Cmp.t - pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e - } - - Lr0Itemset = { - type t: t = Ordset.t Lr0Item.t Lr0Item.cmper_witness - - hash_map: t -> Hash.State.t -> Hash.State.t - cmp: t -> t -> Cmp.t - pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + include IdentifiableIntf.S with type t := t } Lr1Item = { @@ -621,17 +624,13 @@ parser states can be used as persistent reusable snapshots. follow: Ordset.t uns Uns.cmper_witness } - hash_map: t -> Hash.State.t -> Hash.State.t - cmp: t -> t -> Cmp.t - pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + include IdentifiableIntf.S with type t := t } Lr1Itemset = { type t: t = Ordmap.t Lr0Item.t Lr1Item.t Lr0Item.cmper_witness - hash_map: t -> Hash.State.t -> Hash.State.t - cmp: t -> t -> Cmp.t - pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + include IdentifiableIntf.S with type t := t } Lr1ItemsetClosure = { @@ -641,9 +640,7 @@ parser states can be used as persistent reusable snapshots. added: Lr1Itemset.t } - hash_map: t -> Hash.State.t -> Hash.State.t - cmp: t -> t -> Cmp.t - pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + include IdentifiableIntf.S with type t := t } Action = { @@ -652,7 +649,7 @@ parser states can be used as persistent reusable snapshots. | ShiftAccept of uns # `states` index. | Reduce of uns # `prods` index. - pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + include IdentifiableIntf.S with type t := t } State = { @@ -662,7 +659,7 @@ parser states can be used as persistent reusable snapshots. gotos: Map.t uns uns Uns.cmper_witness } - pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + include IdentifiableIntf.S with type t := t } states: array State.t @@ -673,13 +670,13 @@ parser states can be used as persistent reusable snapshots. Token = { type t: t = # Built-in tokens with reserved names. - | EPSILON of unit - | PSEUDO_END of unit + | EPSILON # ε + | PSEUDO_END # ⊥ # One variant per `token` statement, e.g. `A` and `B`. | A of TypeA.t | B of TypeB.t - pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + include IdentifiableIntf.S with type t := t spec: t -> Spec.Symbol.t } @@ -687,10 +684,12 @@ parser states can be used as persistent reusable snapshots. Nonterm = { type t: t = # One variant per `nonterm`/`start` statement, e.g. `S` and `N`. - | S of TypeS.t | N of TypeN.t + | S of TypeS.t + # One variant per start symbol wrapper. + | S' of TypeS.t - pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + include IdentifiableIntf.S with type t := t spec: t -> Spec.Symbol.t } @@ -700,7 +699,7 @@ parser states can be used as persistent reusable snapshots. | Token of Token.t | Nonterm of Nonterm.t - pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + include IdentifiableIntf.S with type t := t spec: t -> Spec.Symbol.t } @@ -708,40 +707,63 @@ parser states can be used as persistent reusable snapshots. State = { type t: t = uns - pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + include IdentifiableIntf.S with type t := t spec: t -> Spec.State.t } - type stack_elm: stack_elm = { - symbol: Symbol.t - symbol_index: uns - state_index: uns - } - type stack: stack = list stack_elm - type reduction: reduction = stack -> stack + Stack = { + module Elm : sig + type t: t = { + symbol: Symbol.t; + state: State.t; + } + + include IdentifiableIntf.S with type t := t + end + + type t: t = Elm.t list + + pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + fmt >e: ?alt:bool -> ?width:uns -> t -> Fmt.Formatter e >e-> Fmt.Formatter e + + Reduction = { + type stack: stack = t + type t: t + type callback: callback = stack -> Symbol.t * stack - reductions: array reduction - [@@doc "Array of reductions, where each element's `index` field corresponds to the element's - array index."] + include IdentifiableIntf.S with type t := t + + callbacks: array callback + [@@doc "Array of reduction callback functions containing embedded parser code."] + + callback: t -> callback + } + + shift: symbol:Symbol.t -> state:State.t -> t -> t + [@@doc "Perform a shift."] + + reduce: reduction:Reduction.t -> t -> t + [@@doc "Perform a reduction."] + } Status = { type t: t = # `feed`/`step` may produce these variants; `next` fast-forwards over them. - | ShiftPrefix of (Token.t, State.t) - | ShiftAccept of (Token.t, State.t) - | Reduce of reduction + | ShiftPrefix of Token.t * State.t + | ShiftAccept of Token.t * State.t + | Reduce of Token.t * Stack.Reduction.t # Common variants. | Prefix # Valid parse prefix; more input needed. | Accept of Nonterm.t # Successful parse result. | Reject of Token.t # Syntax error due to unexpected token. - pp >e: t -> Fmt.Formatter e >e-> Fmt.Formatter e + include IdentifiableIntf.S with type t := t } type t: t = { - stack: stack - status: status + stack: Stack.t + status: Status.t } Start = { @@ -753,7 +775,7 @@ parser states can be used as persistent reusable snapshots. feed: Token.t -> t -> t [@@doc "`feed token t` returns a result with status in {`ShiftPrefix`, `ShiftAccept`, - `Reject`}. `t.status` must be `Prefix`."] + `Reduce`, `Reject`}. `t.status` must be `Prefix`."] step: t -> t [@@doc "`step t` returns the result of applying one state transition to `t`. `t.status` must @@ -910,7 +932,7 @@ hocc token USCORE "_" # Token alias - token STRING + token ISTRING # Punctuation/separators token COLON_COLON_EQ "::=" @@ -944,13 +966,17 @@ hocc # End of input, used to terminate start symbols token EOI - nonterm Ident ::= UIDENT | CIDENT | "_" + nonterm Uident ::= UIDENT + + nonterm Cident ::= CIDENT + + nonterm Ident ::= Uident | Cident | "_" nonterm PrecsTl ::= - | "," UIDENT PrecsTl + | "," Uident PrecsTl | epsilon - nonterm Precs ::= UIDENT PrecsTl + nonterm Precs ::= Uident PrecsTl nonterm PrecRels ::= | "<" Precs @@ -958,23 +984,23 @@ hocc nonterm PrecType ::= "neutral" | "left" | "right" - nonterm Prec ::= PrecType UIDENT PrecRels + nonterm Prec ::= PrecType Uident PrecRels - nonterm OfType ::= "of" CIDENT "." UIDENT + nonterm OfType ::= "of" Cident "." Uident nonterm OfType0 ::= | OfType | epsilon nonterm PrecRef ::= - | "prec" UIDENT + | "prec" Uident | epsilon nonterm TokenAlias ::= - | STRING + | ISTRING | epsilon - nonterm Token ::= "token" CIDENT TokenAlias OfType0 PrecRef + nonterm Token ::= "token" Cident TokenAlias OfType0 PrecRef nonterm Sep ::= LINE_DELIM | ";" | "|" @@ -1005,13 +1031,13 @@ hocc | Delimited CodeTl | CODE_TOKEN CodeTl - nonterm ProdParamType ::= - | CIDENT - | STRING + nonterm ProdParamSymbol ::= + | Cident + | ISTRING nonterm ProdParam ::= - | Ident ":" ProdParamType - | ProdParamType + | Ident ":" ProdParamSymbol + | ProdParamSymbol nonterm ProdParamsTl ::= | ProdParam ProdParamsTl @@ -1045,8 +1071,8 @@ hocc nonterm NontermType ::= "nonterm" | "start" nonterm Nonterm ::= - | NontermType CIDENT PrecRef "::=" Prods - | NontermType CIDENT OfType PrecRef "::=" Reductions + | NontermType Cident PrecRef "::=" Prods + | NontermType Cident OfType PrecRef "::=" Reductions nonterm Stmt ::= | Prec