From cccbb32fd55bf3e2bcc1c122e2ca7ab85837ddf5 Mon Sep 17 00:00:00 2001 From: Sam Mohr Date: Sun, 22 Sep 2024 05:21:53 -0700 Subject: [PATCH] Initial work on tolerant block parsing --- crates/compiler/can/src/desugar.rs | 1 + crates/compiler/can/src/expr.rs | 11 +- crates/compiler/fmt/src/expr.rs | 2 + crates/compiler/parse/src/ast.rs | 14 ++ crates/compiler/parse/src/expr.rs | 122 ++++++++++++------ crates/compiler/parse/src/normalize.rs | 1 + crates/compiler/problem/src/can.rs | 8 +- crates/language_server/src/analysis/tokens.rs | 3 +- crates/reporting/src/error/canonicalize.rs | 22 ++++ 9 files changed, 142 insertions(+), 42 deletions(-) diff --git a/crates/compiler/can/src/desugar.rs b/crates/compiler/can/src/desugar.rs index 1e253c6134..7ea51b3e21 100644 --- a/crates/compiler/can/src/desugar.rs +++ b/crates/compiler/can/src/desugar.rs @@ -317,6 +317,7 @@ pub fn desugar_expr<'a>( | AccessorFunction(_) | Var { .. } | Underscore { .. } + | EmptyBlock(_) | MalformedIdent(_, _) | MalformedClosure | MalformedSuffixed(..) diff --git a/crates/compiler/can/src/expr.rs b/crates/compiler/can/src/expr.rs index 18f295bb0f..e897598812 100644 --- a/crates/compiler/can/src/expr.rs +++ b/crates/compiler/can/src/expr.rs @@ -1337,6 +1337,14 @@ pub fn canonicalize_expr<'a>( Output::default(), ) } + ast::Expr::EmptyBlock(parent) => { + use roc_problem::can::RuntimeError::*; + + let problem = EmptyBlock(*parent, region); + env.problem(Problem::RuntimeError(problem.clone())); + + (RuntimeError(problem), Output::default()) + } ast::Expr::MalformedClosure => { use roc_problem::can::RuntimeError::*; (RuntimeError(MalformedClosure(region)), Output::default()) @@ -2490,7 +2498,8 @@ pub fn is_valid_interpolation(expr: &ast::Expr<'_>) -> bool { | ast::Expr::MalformedIdent(_, _) | ast::Expr::Tag(_) | ast::Expr::OpaqueRef(_) - | ast::Expr::MalformedClosure => true, + | ast::Expr::MalformedClosure + | ast::Expr::EmptyBlock(_) => true, // Newlines are disallowed inside interpolation, and these all require newlines ast::Expr::DbgStmt(_, _) | ast::Expr::LowLevelDbg(_, _, _) diff --git a/crates/compiler/fmt/src/expr.rs b/crates/compiler/fmt/src/expr.rs index 46e004d7f1..e242a8f109 100644 --- a/crates/compiler/fmt/src/expr.rs +++ b/crates/compiler/fmt/src/expr.rs @@ -42,6 +42,7 @@ impl<'a> Formattable for Expr<'a> { | RecordUpdater(_) | Var { .. } | Underscore { .. } + | EmptyBlock(_) | MalformedIdent(_, _) | MalformedClosure | Tag(_) @@ -545,6 +546,7 @@ impl<'a> Formattable for Expr<'a> { buf.indent(indent); loc_expr.format_with_options(buf, parens, newlines, indent); } + EmptyBlock(_) => {} MalformedClosure => {} PrecedenceConflict { .. } => {} EmptyRecordBuilder { .. } => {} diff --git a/crates/compiler/parse/src/ast.rs b/crates/compiler/parse/src/ast.rs index b59fdb591e..1bd653e46d 100644 --- a/crates/compiler/parse/src/ast.rs +++ b/crates/compiler/parse/src/ast.rs @@ -521,6 +521,8 @@ pub enum Expr<'a> { &'a [&'a WhenBranch<'a>], ), + EmptyBlock(EmptyBlockParent), + // Blank Space (e.g. comments, spaces, newlines) before or after an expression. // We preserve this for the formatter; canonicalization ignores it. SpaceBefore(&'a Expr<'a>, &'a [CommentOrNewline<'a>]), @@ -539,6 +541,15 @@ pub enum Expr<'a> { OptionalFieldInRecordBuilder(&'a Loc<&'a str>, &'a Loc>), } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum EmptyBlockParent { + IfBranch, + ElseBranch, + WhenBranch, + Closure, + Definition, +} + impl Expr<'_> { pub fn get_region_spanning_binops(&self) -> Region { match self { @@ -672,6 +683,7 @@ pub fn is_expr_suffixed(expr: &Expr) -> bool { } Expr::SpaceBefore(a, _) => is_expr_suffixed(a), Expr::SpaceAfter(a, _) => is_expr_suffixed(a), + Expr::EmptyBlock { .. } => false, Expr::MalformedIdent(_, _) => false, Expr::MalformedClosure => false, Expr::MalformedSuffixed(_) => false, @@ -1011,6 +1023,7 @@ impl<'a, 'b> RecursiveValueDefIter<'a, 'b> { | Dbg | Tag(_) | OpaqueRef(_) + | EmptyBlock { .. } | MalformedIdent(_, _) | MalformedClosure | PrecedenceConflict(_) @@ -2475,6 +2488,7 @@ impl<'a> Malformed for Expr<'a> { SpaceAfter(expr, _) | ParensAround(expr) => expr.is_malformed(), + EmptyBlock { .. } | MalformedIdent(_, _) | MalformedClosure | MalformedSuffixed(..) | diff --git a/crates/compiler/parse/src/expr.rs b/crates/compiler/parse/src/expr.rs index 20a00f3daa..b41e9788e7 100644 --- a/crates/compiler/parse/src/expr.rs +++ b/crates/compiler/parse/src/expr.rs @@ -1,8 +1,8 @@ use crate::ast::{ - is_expr_suffixed, AssignedField, Collection, CommentOrNewline, Defs, Expr, ExtractSpaces, - Implements, ImplementsAbilities, ImportAlias, ImportAsKeyword, ImportExposingKeyword, - ImportedModuleName, IngestedFileAnnotation, IngestedFileImport, ModuleImport, - ModuleImportParams, Pattern, Spaceable, Spaced, Spaces, SpacesBefore, TryTarget, + is_expr_suffixed, AssignedField, Collection, CommentOrNewline, Defs, EmptyBlockParent, Expr, + ExtractSpaces, Implements, ImplementsAbilities, ImportAlias, ImportAsKeyword, + ImportExposingKeyword, ImportedModuleName, IngestedFileAnnotation, IngestedFileImport, + ModuleImport, ModuleImportParams, Pattern, Spaceable, Spaced, Spaces, SpacesBefore, TryTarget, TypeAnnotation, TypeDef, TypeHeader, ValueDef, }; use crate::blankspace::{ @@ -15,7 +15,7 @@ use crate::ident::{ }; use crate::parser::{ self, and, backtrackable, between, byte, byte_indent, collection_inner, - collection_trailing_sep_e, either, increment_min_indent, indented_seq_skip_first, loc, map, + collection_trailing_sep_e, either, increment_min_indent, indented_seq, loc, map, map_with_arena, optional, reset_min_indent, sep_by1, sep_by1_e, set_min_indent, skip_first, skip_second, specialize_err, specialize_err_ref, then, two_bytes, zero_or_more, EClosure, EExpect, EExpr, EIf, EImport, EImportParams, EInParens, EList, ENumber, EPattern, ERecord, @@ -1746,7 +1746,8 @@ fn parse_stmt_assignment<'a>( let (value_def, state) = { match expr_to_pattern_help(arena, &call.value) { Ok(good) => { - let (_, body, state) = parse_block_inner( + let prior_state = state.clone(); + let (body, state) = match parse_block_inner( options, arena, state, @@ -1755,7 +1756,20 @@ fn parse_stmt_assignment<'a>( |a, _| a.clone(), spaces_after_operator, !spaces_after_operator.value.is_empty(), - )?; + ) { + Ok((_, body, state)) => (body, state), + Err((NoProgress, _parse_err)) => { + let empty_block_region = Region::span_across(&call.region, &loc_op.region); + let empty_body = Loc::at( + empty_block_region, + Expr::EmptyBlock(EmptyBlockParent::Definition), + ); + + (empty_body, prior_state) + } + + Err((MadeProgress, parse_err)) => return Err((MadeProgress, parse_err)), + }; let alias = ValueDef::Body(arena.alloc(Loc::at(call.region, good)), arena.alloc(body)); @@ -2172,6 +2186,7 @@ fn expr_to_pattern_help<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result(options: ExprParseOptions) -> impl Parser<'a, Expr<'a>, EClo // closure_help_help(options) map_with_arena( // After the first token, all other tokens must be indented past the start of the line - indented_seq_skip_first( + indented_seq( // All closures start with a '\' - e.g. (\x -> x + 1) - byte_indent(b'\\', EClosure::Start), + loc(byte_indent(b'\\', EClosure::Start)), // Once we see the '\', we're committed to parsing this as a closure. // It may turn out to be malformed, but it is definitely a closure. and( @@ -2318,17 +2333,28 @@ fn closure_help<'a>(options: ExprParseOptions) -> impl Parser<'a, Expr<'a>, EClo ), EClosure::Arg, ), - skip_first( + and( // Parse the -> which separates params from body - two_bytes(b'-', b'>', EClosure::Arrow), + loc(two_bytes(b'-', b'>', EClosure::Arrow)), // Parse the body - block(options, true, EClosure::IndentBody, EClosure::Body), + optional(block(options, true, EClosure::IndentBody, EClosure::Body)), ), ), ), - |arena: &'a Bump, (params, body)| { + |arena: &'a Bump, (closure_symbol, (params, (arrow, maybe_body)))| { let params: Vec<'a, Loc>> = params; let params: &'a [Loc>] = params.into_bump_slice(); + + let body = maybe_body.unwrap_or_else(|| { + let empty_closure_region = + Region::span_across(&closure_symbol.region, &arrow.region); + + Loc { + region: empty_closure_region, + value: Expr::EmptyBlock(EmptyBlockParent::Closure), + } + }); + Expr::Closure(params, arena.alloc(body)) }, ) @@ -2576,35 +2602,45 @@ mod when { } } -fn if_branch<'a>() -> impl Parser<'a, (Loc>, Loc>), EIf<'a>> { +struct IfBranch<'a> { + then_region: Region, + condition: Loc>, + body: Option>>, +} + +fn if_branch<'a>() -> impl Parser<'a, IfBranch<'a>, EIf<'a>> { let options = ExprParseOptions { accept_multi_backpassing: true, check_for_arrow: true, }; - skip_second( - and( - skip_second( - space0_around_ee( - specialize_err_ref(EIf::Condition, loc_expr(true)), - EIf::IndentCondition, - EIf::IndentThenToken, + map_with_arena( + skip_second( + and( + and( + space0_around_ee( + specialize_err_ref(EIf::Condition, loc_expr(true)), + EIf::IndentCondition, + EIf::IndentThenToken, + ), + loc(parser::keyword(keyword::THEN, EIf::Then)), ), - parser::keyword(keyword::THEN, EIf::Then), - ), - map_with_arena( - space0_after_e( + optional(space0_after_e( block(options, false, EIf::IndentThenBranch, EIf::ThenBranch), EIf::IndentElseToken, - ), - |arena: &'a Bump, block: Loc>| match block.value { - Expr::SpaceAfter(&Expr::SpaceBefore(x, before), after) => block.with_value( - Expr::SpaceBefore(arena.alloc(Expr::SpaceAfter(x, after)), before), - ), - _ => block, - }, + )), ), + parser::keyword(keyword::ELSE, EIf::Else), ), - parser::keyword(keyword::ELSE, EIf::Else), + |arena: &'a Bump, ((condition, then_keyword), maybe_block)| IfBranch { + then_region: then_keyword.region, + condition, + body: maybe_block.map(|block| match block.value { + Expr::SpaceAfter(&Expr::SpaceBefore(x, before), after) => block.with_value( + Expr::SpaceBefore(arena.alloc(Expr::SpaceAfter(x, after)), before), + ), + _ => block, + }), + }, ) } @@ -2694,8 +2730,8 @@ fn import<'a>() -> impl Parser<'a, ValueDef<'a>, EImport<'a>> { fn if_expr_help<'a>(options: ExprParseOptions) -> impl Parser<'a, Expr<'a>, EIf<'a>> { move |arena: &'a Bump, state, min_indent| { - let (_, _, state) = - parser::keyword(keyword::IF, EIf::If).parse(arena, state, min_indent)?; + let (_, if_keyword, state) = + loc(parser::keyword(keyword::IF, EIf::If)).parse(arena, state, min_indent)?; let if_indent = state.line_indent(); @@ -2704,10 +2740,19 @@ fn if_expr_help<'a>(options: ExprParseOptions) -> impl Parser<'a, Expr<'a>, EIf< let mut loop_state = state; let state_final_else = loop { - let (_, (cond, then_branch), state) = - if_branch().parse(arena, loop_state, min_indent)?; + let (_, if_branch_data, state) = if_branch().parse(arena, loop_state, min_indent)?; - branches.push((cond, then_branch)); + let then_branch = if_branch_data.body.unwrap_or_else(|| { + let missing_branch_region = + Region::span_across(&if_keyword.region, &if_branch_data.then_region); + + Loc { + region: missing_branch_region, + value: Expr::EmptyBlock(EmptyBlockParent::IfBranch), + } + }); + + branches.push((if_branch_data.condition, then_branch)); // try to parse another `if` // NOTE this drops spaces between the `else` and the `if` @@ -2965,6 +3010,7 @@ fn parse_stmt_seq<'a, E: SpaceProblem + 'a>( } }; } + Ok((MadeProgress, stmts, state)) } diff --git a/crates/compiler/parse/src/normalize.rs b/crates/compiler/parse/src/normalize.rs index a1def6c677..6e1872f33f 100644 --- a/crates/compiler/parse/src/normalize.rs +++ b/crates/compiler/parse/src/normalize.rs @@ -785,6 +785,7 @@ impl<'a> Normalize<'a> for Expr<'a> { // The formatter can remove redundant parentheses, so also remove these when normalizing for comparison. a.normalize(arena) } + Expr::EmptyBlock(parent) => Expr::EmptyBlock(parent), Expr::MalformedIdent(a, b) => Expr::MalformedIdent(a, remove_spaces_bad_ident(b)), Expr::MalformedClosure => Expr::MalformedClosure, Expr::MalformedSuffixed(a) => Expr::MalformedSuffixed(a), diff --git a/crates/compiler/problem/src/can.rs b/crates/compiler/problem/src/can.rs index 31af6a39aa..54694ca013 100644 --- a/crates/compiler/problem/src/can.rs +++ b/crates/compiler/problem/src/can.rs @@ -5,7 +5,7 @@ use roc_collections::all::MutSet; use roc_module::called_via::BinOp; use roc_module::ident::{Ident, Lowercase, ModuleName, TagName}; use roc_module::symbol::{ModuleId, Symbol}; -use roc_parse::ast::Base; +use roc_parse::ast::{Base, EmptyBlockParent}; use roc_parse::pattern::PatternType; use roc_region::all::{Loc, Region}; use roc_types::types::AliasKind; @@ -434,6 +434,7 @@ impl Problem { field: region, }) | Problem::RuntimeError(RuntimeError::ReadIngestedFileError { region, .. }) + | Problem::RuntimeError(RuntimeError::EmptyBlock(_, region)) | Problem::InvalidAliasRigid { region, .. } | Problem::InvalidInterpolation(region) | Problem::InvalidHexadecimal(region) @@ -662,6 +663,8 @@ pub enum RuntimeError { NonExhaustivePattern, + EmptyBlock(EmptyBlockParent, Region), + InvalidInterpolation(Region), InvalidHexadecimal(Region), InvalidUnicodeCodePt(Region), @@ -740,7 +743,8 @@ impl RuntimeError { record: _, field: region, } - | RuntimeError::ReadIngestedFileError { region, .. } => *region, + | RuntimeError::ReadIngestedFileError { region, .. } + | RuntimeError::EmptyBlock(_, region) => *region, RuntimeError::InvalidUnicodeCodePt(region) => *region, RuntimeError::UnresolvedTypeVar | RuntimeError::ErroneousType => Region::zero(), RuntimeError::LookupNotInScope { loc_name, .. } => loc_name.region, diff --git a/crates/language_server/src/analysis/tokens.rs b/crates/language_server/src/analysis/tokens.rs index 10918316ec..0155ecc58a 100644 --- a/crates/language_server/src/analysis/tokens.rs +++ b/crates/language_server/src/analysis/tokens.rs @@ -725,7 +725,8 @@ impl IterTokens for Loc> { Expr::EmptyRecordBuilder(e) => e.iter_tokens(arena), Expr::SingleFieldRecordBuilder(e) => e.iter_tokens(arena), Expr::OptionalFieldInRecordBuilder(_name, e) => e.iter_tokens(arena), - Expr::MalformedIdent(_, _) + Expr::EmptyBlock(_) + | Expr::MalformedIdent(_, _) | Expr::MalformedClosure | Expr::PrecedenceConflict(_) | Expr::MalformedSuffixed(_) => { diff --git a/crates/reporting/src/error/canonicalize.rs b/crates/reporting/src/error/canonicalize.rs index 7e60ea0729..6a39cb0470 100644 --- a/crates/reporting/src/error/canonicalize.rs +++ b/crates/reporting/src/error/canonicalize.rs @@ -2,6 +2,7 @@ use roc_collections::all::MutSet; use roc_module::called_via::Suffix; use roc_module::ident::{Ident, Lowercase, ModuleName}; use roc_module::symbol::DERIVABLE_ABILITIES; +use roc_parse::ast::EmptyBlockParent; use roc_problem::can::PrecedenceProblem::BothNonAssociative; use roc_problem::can::{ BadPattern, CycleEntry, ExtensionTypeKind, FloatErrorKind, IntErrorKind, Problem, RuntimeError, @@ -2081,6 +2082,27 @@ fn pretty_runtime_error<'b>( RuntimeError::MalformedSuffixed(_) => { todo!("error for malformed suffix"); } + RuntimeError::EmptyBlock(parent, region) => { + let parent_name = match parent { + EmptyBlockParent::IfBranch => "if branch", + EmptyBlockParent::ElseBranch => "else branch", + EmptyBlockParent::WhenBranch => "when branch", + EmptyBlockParent::Closure => "function", + EmptyBlockParent::Definition => "definition", + }; + + doc = alloc.stack([ + alloc.concat([ + alloc.reflow("This "), + alloc.text(parent_name), + alloc.reflow(" has no body:"), + ]), + alloc.region(lines.convert_region(region), severity), + alloc.text("I would need to crash if I used it!"), + ]); + + title = "MISSING BODY"; + } RuntimeError::InvalidFloat(sign @ FloatErrorKind::PositiveInfinity, region, _raw_str) | RuntimeError::InvalidFloat(sign @ FloatErrorKind::NegativeInfinity, region, _raw_str) => { let tip = alloc