Skip to content

Commit

Permalink
Merge {Abstract,Concrete}Token into Token
Browse files Browse the repository at this point in the history
This data structure simplification removes unnecessary complexity dating to
before the finalization of hocc's generated parser API.
  • Loading branch information
Jason Evans committed Aug 20, 2024
1 parent e877ac0 commit 41c0082
Show file tree
Hide file tree
Showing 26 changed files with 8,159 additions and 7,366 deletions.
12 changes: 4 additions & 8 deletions bootstrap/bin/hmc/hmc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,14 @@ open Hmc

let scan_file path =
let rec fn scanner = begin
let scanner', ctok = Scan.next scanner in
let atok = Scan.ConcreteToken.atok ctok in
let source = Scan.ConcreteToken.source ctok in
let scanner', tok = Scan.next scanner in
File.Fmt.stdout
|> Fmt.fmt " "
|> Source.Slice.pp source
|> Fmt.fmt " : "
|> Scan.AbstractToken.pp atok
|> Scan.Token.pp tok
|> Fmt.fmt "\n"
|> ignore;
match atok with
| Scan.AbstractToken.Tok_end_of_input -> ()
match tok with
| Scan.Token.Tok_end_of_input _ -> ()
| _ -> fn scanner'
end in
let () = match File.of_path path with
Expand Down
101 changes: 45 additions & 56 deletions bootstrap/bin/hocc/Parse.hmh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ module Error = struct
{source=Scan.Token.source token_; msg}

let init_mal mal =
let open Hmc.Scan.AbstractToken.Rendition.Malformation in
let open Hmc.Scan.Token.Rendition.Malformation in
{source=source mal; msg=description mal}

let init_scanner scanner msg =
Expand Down Expand Up @@ -641,60 +641,57 @@ let rec scan scanner =
match Scan.Token.malformations scan_token with
| [] -> begin
let token_opt = match scan_token with
| Scan.Token.HmcToken {atok; _} -> begin
match atok with
| Tok_whitespace
| Tok_hash_comment
| Scan.Token.HmcToken tok -> begin
match tok with
| Tok_whitespace _
| Tok_hash_comment _
| Tok_paren_comment _ -> None (* Swallow. *)
| Tok_uident _ -> Some (Token.UIDENT (UIDENT {token_=scan_token}))
| Tok_cident _ -> Some (Token.CIDENT (CIDENT {token_=scan_token}))
| Tok_uscore -> Some (Token.USCORE (USCORE {token_=scan_token}))
| Tok_uscore _ -> Some (Token.USCORE (USCORE {token_=scan_token}))
| Tok_istring _ -> Some (Token.ISTRING (ISTRING {token_=scan_token}))
| Tok_of -> Some (Token.OF (OF {token_=scan_token}))
| Tok_colon -> Some (Token.COLON (COLON {token_=scan_token}))
| Tok_dot -> Some (Token.DOT (DOT {token_=scan_token}))
| Tok_arrow -> Some (Token.ARROW (ARROW {token_=scan_token}))
| Tok_bar -> Some (Token.BAR (BAR {token_=scan_token}))
| Tok_lt -> Some (Token.LT (LT {token_=scan_token}))
| Tok_comma -> Some (Token.COMMA (COMMA {token_=scan_token}))
| Tok_semi -> Some (Token.SEMI (SEMI {token_=scan_token}))
| Tok_line_delim -> Some (Token.LINE_DELIM (LINE_DELIM {token_=scan_token}))
| Tok_of _ -> Some (Token.OF (OF {token_=scan_token}))
| Tok_colon _ -> Some (Token.COLON (COLON {token_=scan_token}))
| Tok_dot _ -> Some (Token.DOT (DOT {token_=scan_token}))
| Tok_arrow _ -> Some (Token.ARROW (ARROW {token_=scan_token}))
| Tok_bar _ -> Some (Token.BAR (BAR {token_=scan_token}))
| Tok_lt _ -> Some (Token.LT (LT {token_=scan_token}))
| Tok_comma _ -> Some (Token.COMMA (COMMA {token_=scan_token}))
| Tok_semi _ -> Some (Token.SEMI (SEMI {token_=scan_token}))
| Tok_line_delim _ -> Some (Token.LINE_DELIM (LINE_DELIM {token_=scan_token}))
| Tok_indent _ -> Some (Token.INDENT (INDENT {token_=scan_token}))
| Tok_dedent _ -> Some (Token.DEDENT (DEDENT {token_=scan_token}))
| Tok_lparen -> Some (Token.LPAREN (LPAREN {token_=scan_token}))
| Tok_rparen -> Some (Token.RPAREN (RPAREN {token_=scan_token}))
| Tok_lcapture -> Some (Token.LCAPTURE (LCAPTURE {token_=scan_token}))
| Tok_rcapture -> Some (Token.RCAPTURE (RCAPTURE {token_=scan_token}))
| Tok_lbrack -> Some (Token.LBRACK (LBRACK {token_=scan_token}))
| Tok_rbrack -> Some (Token.RBRACK (RBRACK {token_=scan_token}))
| Tok_larray -> Some (Token.LARRAY (LARRAY {token_=scan_token}))
| Tok_rarray -> Some (Token.RARRAY (RARRAY {token_=scan_token}))
| Tok_lcurly -> Some (Token.LCURLY (LCURLY {token_=scan_token}))
| Tok_rcurly -> Some (Token.RCURLY (RCURLY {token_=scan_token}))
| Tok_end_of_input -> Some (Token.EOI (EOI {token_=scan_token}))
| Tok_lparen _ -> Some (Token.LPAREN (LPAREN {token_=scan_token}))
| Tok_rparen _ -> Some (Token.RPAREN (RPAREN {token_=scan_token}))
| Tok_lcapture _ -> Some (Token.LCAPTURE (LCAPTURE {token_=scan_token}))
| Tok_rcapture _ -> Some (Token.RCAPTURE (RCAPTURE {token_=scan_token}))
| Tok_lbrack _ -> Some (Token.LBRACK (LBRACK {token_=scan_token}))
| Tok_rbrack _ -> Some (Token.RBRACK (RBRACK {token_=scan_token}))
| Tok_larray _ -> Some (Token.LARRAY (LARRAY {token_=scan_token}))
| Tok_rarray _ -> Some (Token.RARRAY (RARRAY {token_=scan_token}))
| Tok_lcurly _ -> Some (Token.LCURLY (LCURLY {token_=scan_token}))
| Tok_rcurly _ -> Some (Token.RCURLY (RCURLY {token_=scan_token}))
| Tok_end_of_input _ -> Some (Token.EOI (EOI {token_=scan_token}))
| _ -> Some (Token.OTHER_TOKEN (OTHER_TOKEN {token_=scan_token}))
end
| HoccToken {atok; _} -> begin
match atok with
| Tok_hocc -> Some (Token.HOCC (HOCC {token_=scan_token}))
| Tok_token -> Some (Token.TOKEN (TOKEN {token_=scan_token}))
| Tok_nonterm -> Some (Token.NONTERM (NONTERM {token_=scan_token}))
| Tok_start -> Some (Token.START (START {token_=scan_token}))
| Tok_epsilon -> Some (Token.EPSILON_ (EPSILON {token_=scan_token}))
| Tok_neutral -> Some (Token.NEUTRAL (NEUTRAL {token_=scan_token}))
| Tok_left -> Some (Token.LEFT (LEFT {token_=scan_token}))
| Tok_right -> Some (Token.RIGHT (RIGHT {token_=scan_token}))
| Tok_prec -> Some (Token.PREC (PREC {token_=scan_token}))
| Tok_colon_colon_eq ->
Some (Token.COLON_COLON_EQ (COLON_COLON_EQ {token_=scan_token}))
end in
| Tok_hocc _ -> Some (Token.HOCC (HOCC {token_=scan_token}))
| Tok_token _ -> Some (Token.TOKEN (TOKEN {token_=scan_token}))
| Tok_nonterm _ -> Some (Token.NONTERM (NONTERM {token_=scan_token}))
| Tok_start _ -> Some (Token.START (START {token_=scan_token}))
| Tok_epsilon _ -> Some (Token.EPSILON_ (EPSILON {token_=scan_token}))
| Tok_neutral _ -> Some (Token.NEUTRAL (NEUTRAL {token_=scan_token}))
| Tok_left _ -> Some (Token.LEFT (LEFT {token_=scan_token}))
| Tok_right _ -> Some (Token.RIGHT (RIGHT {token_=scan_token}))
| Tok_prec _ -> Some (Token.PREC (PREC {token_=scan_token}))
| Tok_colon_colon_eq _ -> Some (Token.COLON_COLON_EQ (COLON_COLON_EQ {token_=scan_token}))
in
match token_opt with
| Some token_ -> scanner, scan_token, token_, []
| None -> scan scanner
end
| mal :: [] -> begin
(* Try to pass e.g. 42L through as a u64 token to support OCaml syntax. *)
let u64_opt = match Hmc.Scan.AbstractToken.Rendition.Malformation.description mal with
let u64_opt = match Hmc.Scan.Token.Rendition.Malformation.description mal with
| "Invalid numerical constant" -> begin
let source = Scan.Token.source scan_token in
Hmc.Source.Slice.to_string source
Expand All @@ -709,12 +706,12 @@ let rec scan scanner =
in
match u64_opt with
| Some x -> begin
let rendition = Hmc.Scan.AbstractToken.Rendition.Constant x in
let ctok = Hmc.Scan.ConcreteToken.{
atok=Hmc.Scan.AbstractToken.Tok_u64 rendition;
source=Scan.Token.source scan_token
let rendition = Hmc.Scan.Token.Rendition.Constant x in
let tok = Hmc.Scan.Token.Tok_u64 {
source=Scan.Token.source scan_token;
u64=rendition
} in
let scan_token = Scan.Token.HmcToken ctok in
let scan_token = Scan.Token.HmcToken tok in
scanner, scan_token, Token.OTHER_TOKEN (OTHER_TOKEN {token_=scan_token}), []
end
| None -> scanner, scan_token, Token.OTHER_TOKEN (OTHER_TOKEN {token_=scan_token}), [mal]
Expand Down Expand Up @@ -1800,11 +1797,7 @@ let min_comment_indentation_of_hocc_block = function

let base_of_code code =
let of_token token_ =
let open Scan.Token in
let source = match token_ with
| HmcToken ctok -> ctok |> Hmc.Scan.ConcreteToken.source
| HoccToken ctok -> ctok |> Scan.ConcreteToken.source
in
let source = Scan.Token.source token_ in
Hmc.Source.Slice.base source
in
let rec of_delimited = function
Expand Down Expand Up @@ -1860,11 +1853,7 @@ let last_token_of_code code =

let past_of_code code =
let of_token token_ =
let open Scan.Token in
let source = match token_ with
| HmcToken ctok -> ctok |> Hmc.Scan.ConcreteToken.source
| HoccToken ctok -> ctok |> Scan.ConcreteToken.source
in
let source = Scan.Token.source token_ in
Hmc.Source.Slice.past source
in
last_token_of_code code |> of_token
Expand Down
74 changes: 16 additions & 58 deletions bootstrap/bin/hocc/code.ml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ let line_context_indentation line_context =
raw_indentation - (raw_indentation % 4L)

let indentation_of_hocc = function
| Scan.Token.HmcToken _ -> not_reached ()
| HoccToken {source; _} -> Hmc.Source.Slice.line_context source |> line_context_indentation
| Scan.Token.Tok_hocc {source} -> Hmc.Source.Slice.line_context source |> line_context_indentation
| _ -> not_reached ()

let macro_of_line line =
let open String.C in
Expand Down Expand Up @@ -476,14 +476,8 @@ let generate_hmi conf Parse.(Hmhi {prelude; hocc_; postlude; eoi}) io spec =
|> (fun formatter ->
match prelude with
| Parse.Matter {token_; _} -> begin
let base = match token_ with
| HmcToken {source; _} -> Hmc.Source.Slice.base source
| HoccToken _ -> not_reached ()
in
let past = match hocc_ with
| HmcToken _ -> not_reached ()
| HoccToken {source; _} -> Hmc.Source.Slice.base source
in
let base = Scan.Token.source token_ |> Hmc.Source.Slice.base in
let past = Scan.Token.source hocc_ |> Hmc.Source.Slice.base in
let source = Hmc.Source.Slice.of_cursors ~base ~past in
formatter |> Fmt.fmt (Hmc.Source.Slice.to_string source)
end
Expand All @@ -494,14 +488,8 @@ let generate_hmi conf Parse.(Hmhi {prelude; hocc_; postlude; eoi}) io spec =
|> (fun formatter ->
match postlude with
| Parse.Matter _ -> begin
let base = match hocc_ with
| HmcToken _ -> not_reached ()
| HoccToken {source; _} -> Hmc.Source.Slice.past source
in
let past = match eoi with
| HmcToken {source; _} -> Hmc.Source.Slice.past source
| HoccToken _ -> not_reached ()
in
let base = Scan.Token.source hocc_ |> Hmc.Source.Slice.past in
let past = Scan.Token.source eoi |> Hmc.Source.Slice.past in
let source = Hmc.Source.Slice.of_cursors ~base ~past in
formatter
|> fmt_source_directive indentation source
Expand Down Expand Up @@ -1853,14 +1841,8 @@ let generate_hm conf
|> (fun formatter ->
match prelude with
| Parse.Matter {token_; _} -> begin
let base = match token_ with
| HmcToken {source; _} -> Hmc.Source.Slice.base source
| HoccToken _ -> not_reached ()
in
let past = match hocc_ with
| HmcToken _ -> not_reached ()
| HoccToken {source; _} -> Hmc.Source.Slice.base source
in
let base = Scan.Token.source token_ |> Hmc.Source.Slice.base in
let past = Scan.Token.source hocc_ |> Hmc.Source.Slice.base in
let source = Hmc.Source.Slice.of_cursors ~base ~past in
formatter |> Fmt.fmt (Hmc.Source.Slice.to_string source)
end
Expand All @@ -1872,10 +1854,7 @@ let generate_hm conf
match postlude with
| Parse.Matter _ -> begin
let base = Parse.postlude_base_of_hocc hocc_block in
let past = match eoi with
| HmcToken {source; _} -> Hmc.Source.Slice.past source
| HoccToken _ -> not_reached ()
in
let past = Scan.Token.source eoi |> Hmc.Source.Slice.past in
let source = Hmc.Source.Slice.of_cursors ~base ~past in
formatter
|> fmt_source_directive indentation source
Expand Down Expand Up @@ -2299,14 +2278,8 @@ let generate_mli conf Parse.(Hmhi {prelude; hocc_; postlude; eoi}) io spec =
|> (fun formatter ->
match prelude with
| Parse.Matter {token_; _} -> begin
let base = match token_ with
| HmcToken {source; _} -> Hmc.Source.Slice.base source
| HoccToken _ -> not_reached ()
in
let past = match hocc_ with
| HmcToken _ -> not_reached ()
| HoccToken {source; _} -> Hmc.Source.Slice.base source
in
let base = Scan.Token.source token_ |> Hmc.Source.Slice.base in
let past = Scan.Token.source hocc_ |> Hmc.Source.Slice.base in
let source = Hmc.Source.Slice.of_cursors ~base ~past in
formatter |> Fmt.fmt (Hmc.Source.Slice.to_string source)
end
Expand All @@ -2316,14 +2289,8 @@ let generate_mli conf Parse.(Hmhi {prelude; hocc_; postlude; eoi}) io spec =
|> (fun formatter ->
match postlude with
| Parse.Matter _ -> begin
let base = match hocc_ with
| HmcToken _ -> not_reached ()
| HoccToken {source; _} -> Hmc.Source.Slice.past source
in
let past = match eoi with
| HmcToken {source; _} -> Hmc.Source.Slice.past source
| HoccToken _ -> not_reached ()
in
let base = Scan.Token.source hocc_ |> Hmc.Source.Slice.past in
let past = Scan.Token.source eoi |> Hmc.Source.Slice.past in
let source = Hmc.Source.Slice.of_cursors ~base ~past in
formatter
|> fmt_ml_source_directive source
Expand Down Expand Up @@ -3712,14 +3679,8 @@ let generate_ml conf
|> (fun formatter ->
match prelude with
| Parse.Matter {token_; _} -> begin
let base = match token_ with
| HmcToken {source; _} -> Hmc.Source.Slice.base source
| HoccToken _ -> not_reached ()
in
let past = match hocc_ with
| HmcToken _ -> not_reached ()
| HoccToken {source; _} -> Hmc.Source.Slice.base source
in
let base = Scan.Token.source token_ |> Hmc.Source.Slice.base in
let past = Scan.Token.source hocc_ |> Hmc.Source.Slice.base in
let source = Hmc.Source.Slice.of_cursors ~base ~past in
formatter |> Fmt.fmt (Hmc.Source.Slice.to_string source)
end
Expand All @@ -3730,10 +3691,7 @@ let generate_ml conf
match postlude with
| Parse.Matter _ -> begin
let base = Parse.postlude_base_of_hocc hocc_block in
let past = match eoi with
| HmcToken {source; _} -> Hmc.Source.Slice.past source
| HoccToken _ -> not_reached ()
in
let past = Scan.Token.source eoi |> Hmc.Source.Slice.past in
let source = Hmc.Source.Slice.of_cursors ~base ~past in
formatter
|> fmt_ml_source_directive source
Expand Down
Loading

0 comments on commit 41c0082

Please sign in to comment.