Skip to content

Commit

Permalink
Begin implementing hocc
Browse files Browse the repository at this point in the history
- Add hocc syntax highlighting for kakoune.
  There's probably a way to avoid the near-complete duplication with the Hemlock
  syntax highlighting configuration, but it may involve shell evaluation of all
  highlighter additions.
- Add `Os.argv`. Command line parameters may not be UTF-8-encoded, so it's not
  safe to use OCaml's `Sys.argv`.

Finish implementing command line parameter validation

Begin implementing `hocc` specification data structures

Implement `hocc` scanner

Revise `hocc` grammar, define corresponding syntax tree types

Enable `hocc` syntax highlighting for the `.hmhi` extension

Misc

- Revise `hocc` grammar to use ε productions where appropriate.
- Implement hmhi parsing.
- Add `Hmc.Scan.cursor` as a convenience function.
- Revise `hocc` grammar productions to use right recursion, and to support
  Hemlock code sequences well enough to pass muster for `hocc`'s purposes.

Add `Parse.Error`, improve error reporting

Add ctx type for parser functions

Incorporate token malformations into parse error reporting

Add `Hmc.Scan.AbstractToken.malformations`

This convenience function extracts malformations, which centralizes the verbose
exhaustive pattern matching necessary to do this reliably.

Revise `hocc` command line usage to make parser generation optional

Start implementing `.hmh` parsing

Finish initial implementation of `hocc` parser (doesn't work correctly)

Implement parse tree pretty printing, start implementing tracing

Fix `hocc` parsing bugs (successfully parses `Example1.hmh[i]`)

Add `hocc` parsing tests, fix grammar/implementation bugs

Make production parameter lexical bindings optional

Add more `hocc` parsing tests

Add more `hocc` parser tests

Add `Hocc` grammar parsing test

Enhance `hocc` grammar to omit `-> ()` for unit-type non-terminals

Add token alias syntax

`hocc` documentation edits

Expand `hocc` documentation

Finish initial draft of `hocc.md`

Add `hocc` parse error tests

Add remaining `hocc` parser tests

Add Lyken grammar spec (two forms, one of which uses DAG prec by construction)

Remove `UIDENT` from `ProdParamType`

Restrict precedence relationships to `<` with previously defined precedences

Remove the `-dot`/`-graphviz` command line options, since cycles are no longer
possible and the textual precedence relationships closely correspond to the
graph.

Add `Io` module

Implement precedence validation/closure

Incorporate `stderr` into `Io.t` and add `Io.fatal`

Implement html precedence reporting

Implement hocc report generation

Make `hocc` scanner lazy

Create the `hocc` report directory if needed

Expose `Hmc.Scan.ConcretToken.t` definition to enable pattern matching

Implement various modules on which `Spec` depends

Implement more of `Spec.t` initialization

Reduction initialization still isn't implemented, nor any reporting besides
`prec`.

Implement token/nonterm report generation

Misc refactoring, start of reduction procesessing

Combine duplicate code into `QualifiedType`

Encapsulate `Reduction.param` as `Reduction.Param.t`

Enhance first pass to gather qualified type in addition to index

Implement `Reduction.Params`

Refactor production parameter processing to be universally usable

Implement reduction processing

Synthesize `PSEUDO_END`, prep for start symbol wrapper synthesis

Synthesize wrappers for start symbols

Implement symbol first/follow set closure

Implement `Lr[01]Item{,set}`

Start implementing LR(1) item set closure

Implement `Lr1ItemsetClosure.compat_ident`

Implement `Lr1ItemsetClosure.compat_weak`

Finish implementing `Lr1ItemsetClosure`

Implement LR(1) item set closure generation
  • Loading branch information
Jason Evans committed Apr 19, 2022
1 parent 8dcbc94 commit 3236a7e
Show file tree
Hide file tree
Showing 113 changed files with 8,668 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .editorconfig
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# https://editorconfig.org/

[*.{hm,hmi}]
[*.{hm,hmi,hmh,hmhi}]
indent_style = space
indent_size = 4
tab_width = 8
Expand Down
File renamed without changes.
5 changes: 2 additions & 3 deletions bootstrap/bin/hmc.ml → bootstrap/bin/hmc/hmc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,9 @@ let scan_file path =
()

let _ =
match Array.length Sys.argv with
match Array.length Os.argv with
| 0L | 1L -> halt "hmc usage: hmc <path>"
| _ -> begin
let path_str = Array.get 1L Sys.argv in
let path = Path.of_string path_str in
let path = Path.of_bytes (Bytes.Slice.init (Array.get 1L Os.argv)) in
scan_file path
end
11 changes: 11 additions & 0 deletions bootstrap/bin/hocc/assoc.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
open Basis

type t =
| Left
| Right

let pp t formatter =
formatter |> Fmt.fmt (match t with
| Left -> "Left"
| Right -> "Right"
)
7 changes: 7 additions & 0 deletions bootstrap/bin/hocc/assoc.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
open Basis

type t =
| Left
| Right

include FormattableIntf.SMono with type t := t
236 changes: 236 additions & 0 deletions bootstrap/bin/hocc/conf.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
open! Basis
include Basis.Rudiments

type algorithm =
| LR1Compact
| LR1Canonical

let pp_algorithm algorithm formatter =
formatter |> Fmt.fmt (match algorithm with
| LR1Compact -> "LR1Compact"
| LR1Canonical -> "LR1Canonical"
)

type t = {
verbose: bool;
text: bool;
html: bool;
hocc: bool;
algorithm: algorithm;
hemlock: bool;
ocaml: bool;
srcdir_opt: Path.t option;
module_opt: Path.Segment.t option;
dstdir_opt: Path.t option;
}

let pp {verbose; text; html; hocc; algorithm; hemlock; ocaml; srcdir_opt; module_opt; dstdir_opt}
formatter =
formatter
|> Fmt.fmt "{verbose=" |> Bool.pp verbose
|> Fmt.fmt "; text=" |> Bool.pp text
|> Fmt.fmt "; html=" |> Bool.pp html
|> Fmt.fmt "; hocc=" |> Bool.pp hocc
|> Fmt.fmt "; algorithm=" |> pp_algorithm algorithm
|> Fmt.fmt "; hemlock=" |> Bool.pp hemlock
|> Fmt.fmt "; ocaml=" |> Bool.pp ocaml
|> Fmt.fmt "; srcdir_opt=" |> (Option.pp Path.pp) srcdir_opt
|> Fmt.fmt "; module_opt=" |> (Option.pp Path.Segment.pp) module_opt
|> Fmt.fmt "; dstdir_opt=" |> (Option.pp Path.pp) dstdir_opt
|> Fmt.fmt "}"

let default = {
verbose=false;
text=false;
html=false;
hocc=false;
algorithm=LR1Compact;
hemlock=false;
ocaml=false;
srcdir_opt=None;
module_opt=None;
dstdir_opt=None;
}

let usage error =
let exit_code, formatter = match error with
| false -> 0, File.Fmt.stdout
| true -> 1, File.Fmt.stderr
in
formatter
|> Fmt.fmt {|hocc usage: hocc <parameters>

Parameters:
-h[elp] : Print command usage and exit.
-v[erbose] : Print progress information during parser generation.
-txt | -text : Write a detailed automoton description in plain text format
to "<outdir>/hocc/<basename>.txt".
-html : Write a detailed automoton description in internally
hyperlinked HTML format to "<outdir>/hocc/<basename>.html".
-hmh | -hocc : Write a complete grammar specification in hocc format to
"<outdir>/hocc/<basename>.hmh", but with all non-terminal
types and reduction code omitted.
-c[anonical] : Generate a canonical LR(1) parser rather than a compact
LR(1) parser.
-hm | -hemlock : Generate a Hemlock-based parser implementation and write it
to "<outdir>/<basename>.hm[i]".
-ml | -ocaml : Generate an OCaml-based parser implementation and write it
to "<outdir>/<basename>.ml[i]". This is brittle
functionality intended only for Hemlock bootstrapping.
-s[rc] <src> : Path and module name of input source, where inputs match
"<src>.hmh[i]" and "<src>" comprises the source directory
and module name, "[<srcdir>/]<module>".
-d[stdir] <dstdir> : Path to directory in which to place generated output, such
that output file paths match "<dstdir>/[hocc/]<module>.*".
Defaults to "<srcdir>".
|}
|> ignore;
Stdlib.exit exit_code

let is_segment_cident segment =
let rec cont cursor past = begin
match String.C.Cursor.(<) cursor past with
| false -> true
| true -> begin
let cp, cursor' = String.C.Cursor.next cursor in
match cp with
| cp when Codepoint.(cp >= of_char 'A' && cp <= of_char 'Z') -> cont cursor' past
| cp when Codepoint.(cp >= of_char 'a' && cp <= of_char 'z') -> cont cursor' past
| cp when Codepoint.(cp >= of_char '0' && cp <= of_char '9') -> cont cursor' past
| cp when Codepoint.(cp = of_char '_') -> cont cursor' past
| cp when Codepoint.(cp = of_char '\'') -> cont cursor' past
| _ -> false
end
end in
let rec start cursor past = begin
match String.C.Cursor.(<) cursor past with
| false -> false
| true -> begin
let cp, cursor' = String.C.Cursor.next cursor in
match cp with
| cp when Codepoint.(cp = of_char '_') -> start cursor' past
| cp when Codepoint.(cp >= of_char 'A' && cp <= of_char 'Z') -> cont cursor' past
| _ -> false
end
end in
match Path.Segment.to_string segment with
| None -> false
| Some s -> begin
let sslice = String.C.Slice.of_string s in
let base = String.C.Slice.base sslice in
let past = String.C.Slice.past sslice in
start base past
end

let of_argv argv =
let arg_arg argv i = begin
let i' = succ i in
match i' < Array.length argv with
| false -> begin
let arg = Bytes.to_string_replace (Array.get i argv) in
File.Fmt.stderr |> Fmt.fmt "hocc: " |> Fmt.fmt arg |> Fmt.fmt " argument missing\n"
|> ignore;
usage true
end
| true -> Array.get i' argv
end in
let rec f t argv i = begin
match i < Array.length argv with
| false -> t
| true -> begin
let arg_bytes = Array.get i argv in
let arg_string = Bytes.to_string_replace arg_bytes in
match arg_string with
| "-help" | "-h" -> usage false
| "-verbose" | "-v" -> f {t with verbose=true} argv (succ i)
| "-txt" | "-text" -> f {t with text=true} argv (succ i)
| "-html" -> f {t with html=true} argv (succ i)
| "-hmh" | "-hocc" -> f {t with hocc=true} argv (succ i)
| "-canonical" | "-c" -> f {t with algorithm=LR1Canonical} argv (succ i)
| "-hm" | "-hemlock" -> f {t with hemlock=true} argv (succ i)
| "-ml" | "-ocaml" -> f {t with ocaml=true} argv (succ i)
| "-src" | "-s" -> begin
let path = Path.of_bytes (Bytes.Slice.init (arg_arg argv i)) in
let dirname, basename_opt = Path.split path in
let srcdir_opt = match Path.is_empty dirname with
| true -> None
| false -> Some dirname
in
let module_opt = match basename_opt with
| None -> begin
File.Fmt.stderr
|> Fmt.fmt "hocc: Invalid source: "
|> Path.pp path
|> Fmt.fmt "\n"
|> ignore;
usage true
end
| Some segment -> begin
match is_segment_cident segment with
| false -> begin
File.Fmt.stderr |> Fmt.fmt "hocc: Invalid source module name: "
|> Path.Segment.pp segment |> Fmt.fmt "\n" |> ignore;
usage true
end
| true -> Some segment
end
in
f {t with srcdir_opt; module_opt} argv (i + 2L)
end
| "-dstdir" | "-d" -> begin
let dstdir = Path.of_bytes (Bytes.Slice.init (arg_arg argv i)) in
f {t with dstdir_opt=Some dstdir} argv (i + 2L)
end
| _ -> begin
File.Fmt.stderr
|> Fmt.fmt "hocc: Invalid command line parameter: "
|> String.pp arg_string
|> Fmt.fmt "\n"
|> ignore;
usage true
end
end
end in
let t = f default argv 1L in
match t.module_opt with
| None -> begin
File.Fmt.stderr |> Fmt.fmt "hocc: Source unspecified\n" |> ignore;
usage true
end
| Some _ -> t

let verbose {verbose; _} =
verbose

let text {text; _} =
text

let html {html; _} =
html

let hocc {hocc; _} =
hocc

let algorithm {algorithm; _} =
algorithm

let hemlock {hemlock; _} =
hemlock

let ocaml {ocaml; _} =
ocaml

let srcdir {srcdir_opt; _} =
match srcdir_opt with
| None -> Path.of_string "."
| Some srcdir -> srcdir

let module_ {module_opt; _} =
match module_opt with
| None -> not_reached ()
| Some m -> m

let dstdir {dstdir_opt; _} =
match dstdir_opt with
| None -> Path.of_string "."
| Some dstdir -> dstdir
24 changes: 24 additions & 0 deletions bootstrap/bin/hocc/conf.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
open Basis

type algorithm =
| LR1Compact
| LR1Canonical

val pp_algorithm: algorithm -> (module Fmt.Formatter) -> (module Fmt.Formatter)

type t

include FormattableIntf.SMono with type t := t

val of_argv: Bytes.t array -> t

val verbose: t -> bool
val text: t -> bool
val html: t -> bool
val hocc: t -> bool
val algorithm: t -> algorithm
val hemlock: t -> bool
val ocaml: t -> bool
val srcdir: t -> Path.t
val module_: t -> Path.Segment.t
val dstdir: t -> Path.t
7 changes: 7 additions & 0 deletions bootstrap/bin/hocc/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
(executables
(names hocc)
(libraries Basis Hmc))

(install
(section bin)
(files (hocc.exe as hocc)))
59 changes: 59 additions & 0 deletions bootstrap/bin/hocc/hocc.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
open Basis
open! Basis.Rudiments

let parse_hmhi (Io.{hmhi; _} as io) =
match hmhi with
| Some text -> begin
let scanner = Scan.init text in
let io =
io.log
|> Fmt.fmt "hocc: Parsing " |> Path.pp (Option.value_hlt (Text.path text)) |> Fmt.fmt "\n"
|> Io.with_log io in
let _scanner', hmhi = Parse.hmhi scanner in
match hmhi with
| Error errors -> begin
List.iter (List.sort errors ~cmp:Parse.Error.cmp) ~f:(fun error ->
File.Fmt.stderr |> Parse.Error.fmt ~alt:true error |> ignore
);
Stdlib.exit 1
end
| Ok hmhi -> io, Some hmhi
end
| None -> io, None

let parse_hmh (Io.{hmh; _} as io) =
let scanner = Scan.init hmh in
let io =
io.log
|> Fmt.fmt "hocc: Parsing " |> Path.pp (Option.value_hlt (Text.path hmh)) |> Fmt.fmt "\n"
|> Io.with_log io in
let _scanner', hmh = Parse.hmh scanner in
match hmh with
| Error errors -> begin
List.iter (List.sort errors ~cmp:Parse.Error.cmp) ~f:(fun error ->
File.Fmt.stderr |> Parse.Error.fmt ~alt:true error |> ignore
);
Stdlib.exit 1
end
| Ok hmh -> io, hmh

let _ =
let conf = Conf.of_argv Os.argv in
let io = Io.init conf in
let io, _hmhi_opt = parse_hmhi io in
let io, hmh = parse_hmh io in
let io, spec = Spec.init conf io hmh in
let io = match Conf.text conf with
| false -> io
| true -> Spec.to_txt conf io spec
in
let io = match Conf.html conf with
| false -> io
| true -> Spec.to_html conf io spec
in
let io = match Conf.hocc conf with
| false -> io
| true -> Spec.to_hocc io spec
in
let _io = Io.fini conf io in
()
Loading

0 comments on commit 3236a7e

Please sign in to comment.