From ffba552d9ae02d89923f932cc7b155eb90cb6b60 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Wed, 5 Apr 2023 20:09:21 +0900 Subject: [PATCH] remove `throw` block deoptimization completely After experimenting with #49235, I started to question if we are getting any actual benefit from the `throw` block deoptimization anymore. This commit removes the deoptimization from the system entirely. Based on the numbers below, it appears that the deoptimization is not very profitable in our current Julia-level compilation pipeline, with the effects analysis playing a significant role in reducing latency. Here are the updated benchmark: | Metric | master | #49235 | this commit | |-------------------------|-----------|-------------|--------------------------------------------| | Base (seconds) | 15.579300 | 15.206645 | 15.42059 | | Stdlibs (seconds) | 17.919013 | 17.667094 | 17.404586 | | Total (seconds) | 33.499279 | 32.874737 | 32.826162 | | Precompilation (seconds) | 53.488528 | 53.152028 | 53.152028 | | First time `plot(rand(10,3))` [^1] | `3.432983 seconds (16.55 M allocations)` | `3.477767 seconds (16.45 M allocations)` | `3.539117 seconds (16.43 M allocations)` | | First time `solve(prob, QNDF())(5.0)` [^2] | `4.628278 seconds (15.74 M allocations)` | `4.609222 seconds (15.32 M allocations)` | `4.547323 seconds (15.19 M allocations: 823.510 MiB)` | [^1]: `using Plots; plot(rand(10,3))` [^2]: `using DifferentialEquations; solve(prob, QNDF())(5.0)` --- base/compiler/abstractinterpretation.jl | 22 ------------ base/compiler/inferencestate.jl | 23 ------------- base/compiler/optimize.jl | 12 +++---- base/compiler/types.jl | 21 ------------ base/compiler/utilities.jl | 45 ------------------------- 5 files changed, 5 insertions(+), 118 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index e110482ff9ac5d..4c7d0a9d22c808 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -34,28 +34,6 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo, @nospecialize(atype), sv::AbsIntState, max_methods::Int) ⊑ₚ = ⊑(ipo_lattice(interp)) - if !should_infer_this_call(interp, sv) - add_remark!(interp, sv, "Skipped call in throw block") - nonoverlayed = false - if isoverlayed(method_table(interp)) && is_nonoverlayed(sv.ipo_effects) - # as we may want to concrete-evaluate this frame in cases when there are - # no overlayed calls, try an additional effort now to check if this call - # isn't overlayed rather than just handling it conservatively - matches = find_matching_methods(typeinf_lattice(interp), arginfo.argtypes, atype, method_table(interp), - InferenceParams(interp).max_union_splitting, max_methods) - if !isa(matches, FailedMethodMatch) - nonoverlayed = matches.nonoverlayed - end - else - nonoverlayed = true - end - # At this point we are guaranteed to end up throwing on this path, - # which is all that's required for :consistent-cy. Of course, we don't - # know anything else about this statement. - effects = Effects(; consistent=ALWAYS_TRUE, nonoverlayed) - return CallMeta(Any, effects, NoCallInfo()) - end - argtypes = arginfo.argtypes matches = find_matching_methods(typeinf_lattice(interp), argtypes, atype, method_table(interp), InferenceParams(interp).max_union_splitting, max_methods) diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl index 8aff8910f457f6..ae330d52fa23ac 100644 --- a/base/compiler/inferencestate.jl +++ b/base/compiler/inferencestate.jl @@ -298,7 +298,6 @@ mutable struct InferenceState cached = cache === :global # some more setups - InferenceParams(interp).unoptimize_throw_blocks && mark_throw_blocks!(src, handler_at) cache !== :no && push!(get_inference_cache(interp), result) return new( @@ -840,27 +839,5 @@ bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::InferenceStat bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) = state.rt === Any -function should_infer_this_call(interp::AbstractInterpreter, sv::InferenceState) - if InferenceParams(interp).unoptimize_throw_blocks - # Disable inference of calls in throw blocks, since we're unlikely to - # need their types. There is one exception however: If up until now, the - # function has not seen any side effects, we would like to make sure there - # aren't any in the throw block either to enable other optimizations. - if is_stmt_throw_block(get_curr_ssaflag(sv)) - should_infer_for_effects(sv) || return false - end - end - return true -end -function should_infer_for_effects(sv::InferenceState) - effects = sv.ipo_effects - effects.consistent === ALWAYS_FALSE && return false - effects.effect_free === ALWAYS_FALSE && return false - effects.terminates || return false - effects.nonoverlayed || return false - return true -end -should_infer_this_call(::AbstractInterpreter, ::IRInterpretationState) = true - add_remark!(::AbstractInterpreter, ::InferenceState, remark) = return add_remark!(::AbstractInterpreter, ::IRInterpretationState, remark) = return diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 84817b1bf65319..2966b31ca55a3f 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -229,7 +229,6 @@ _topmod(sv::OptimizationState) = _topmod(sv.mod) is_stmt_inline(stmt_flag::UInt8) = stmt_flag & IR_FLAG_INLINE ≠ 0 is_stmt_noinline(stmt_flag::UInt8) = stmt_flag & IR_FLAG_NOINLINE ≠ 0 -is_stmt_throw_block(stmt_flag::UInt8) = stmt_flag & IR_FLAG_THROW_BLOCK ≠ 0 """ stmt_effect_flags(stmt, rt, src::Union{IRCode,IncrementalCompact}) -> @@ -654,7 +653,7 @@ plus_saturate(x::Int, y::Int) = max(x, y, x+y) isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T)) function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState}, - union_penalties::Bool, params::OptimizationParams, error_path::Bool = false) + union_penalties::Bool, params::OptimizationParams) head = ex.head if is_meta_expr_head(head) return 0 @@ -689,7 +688,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp return 0 elseif (f === Core.arrayref || f === Core.const_arrayref || f === Core.arrayset) && length(ex.args) >= 3 atyp = argextype(ex.args[3], src, sptypes) - return isknowntype(atyp) ? 4 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty + return isknowntype(atyp) ? 4 : params.inline_nonleaf_penalty elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes))) return 1 elseif f === Core.isa @@ -712,7 +711,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp if extyp === Union{} return 0 end - return error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty + return params.inline_nonleaf_penalty elseif head === :foreigncall || head === :invoke || head === :invoke_modify # Calls whose "return type" is Union{} do not actually return: # they are errors. Since these are not part of the typical @@ -729,7 +728,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp end a = ex.args[2] if a isa Expr - cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, union_penalties, params, error_path)) + cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, union_penalties, params)) end return cost elseif head === :copyast @@ -749,8 +748,7 @@ function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{Cod thiscost = 0 dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt if stmt isa Expr - thiscost = statement_cost(stmt, line, src, sptypes, union_penalties, params, - is_stmt_throw_block(isa(src, IRCode) ? src.stmts.flag[line] : src.ssaflags[line]))::Int + thiscost = statement_cost(stmt, line, src, sptypes, union_penalties, params)::Int elseif stmt isa GotoNode # loops are generally always expensive # but assume that forward jumps are already counted for from diff --git a/base/compiler/types.jl b/base/compiler/types.jl index c987e03df52616..364e121246c5d4 100644 --- a/base/compiler/types.jl +++ b/base/compiler/types.jl @@ -130,11 +130,6 @@ Parameters that control abstract interpretation-based type inference operation. information available. [`Base.@constprop :aggressive`](@ref Base.@constprop) can have a more fine-grained control on this configuration with per-method annotation basis. --- -- `inf_params.unoptimize_throw_blocks::Bool = true`\\ - If `true`, skips inferring calls that are in a block that is known to `throw`. - It may improve the compiler latency without sacrificing the runtime performance - in common situations. ---- - `inf_params.assume_bindings_static::Bool = false`\\ If `true`, assumes that no new bindings will be added, i.e. a non-existing binding at inference time can be assumed to always not exist at runtime (and thus e.g. any access to @@ -150,7 +145,6 @@ struct InferenceParams tuple_complexity_limit_depth::Int ipo_constant_propagation::Bool aggressive_constant_propagation::Bool - unoptimize_throw_blocks::Bool assume_bindings_static::Bool ignore_recursion_hardlimit::Bool @@ -162,7 +156,6 @@ struct InferenceParams tuple_complexity_limit_depth::Int, ipo_constant_propagation::Bool, aggressive_constant_propagation::Bool, - unoptimize_throw_blocks::Bool, assume_bindings_static::Bool, ignore_recursion_hardlimit::Bool) return new( @@ -173,7 +166,6 @@ struct InferenceParams tuple_complexity_limit_depth, ipo_constant_propagation, aggressive_constant_propagation, - unoptimize_throw_blocks, assume_bindings_static, ignore_recursion_hardlimit) end @@ -187,7 +179,6 @@ function InferenceParams( #=tuple_complexity_limit_depth::Int=# 3, #=ipo_constant_propagation::Bool=# true, #=aggressive_constant_propagation::Bool=# false, - #=unoptimize_throw_blocks::Bool=# true, #=assume_bindings_static::Bool=# false, #=ignore_recursion_hardlimit::Bool=# false); max_methods::Int = params.max_methods, @@ -197,7 +188,6 @@ function InferenceParams( tuple_complexity_limit_depth::Int = params.tuple_complexity_limit_depth, ipo_constant_propagation::Bool = params.ipo_constant_propagation, aggressive_constant_propagation::Bool = params.aggressive_constant_propagation, - unoptimize_throw_blocks::Bool = params.unoptimize_throw_blocks, assume_bindings_static::Bool = params.assume_bindings_static, ignore_recursion_hardlimit::Bool = params.ignore_recursion_hardlimit) return InferenceParams( @@ -208,7 +198,6 @@ function InferenceParams( tuple_complexity_limit_depth, ipo_constant_propagation, aggressive_constant_propagation, - unoptimize_throw_blocks, assume_bindings_static, ignore_recursion_hardlimit) end @@ -233,10 +222,6 @@ Parameters that control optimizer operation. tuple return types (in hopes of splitting it up). `opt_params.inline_tupleret_bonus` will be added to `opt_params.inline_cost_threshold` when making inlining decision. --- -- `opt_params.inline_error_path_cost::Int = 20`\\ - Specifies the penalty cost for an un-optimized dynamic call in a block that is known to - `throw`. See also [`(inf_params::InferenceParams).unoptimize_throw_blocks`](@ref InferenceParams). ---- - `opt_params.max_tuple_splat::Int = 32`\\ When attempting to inline `Core._apply_iterate`, abort the optimization if the tuple contains more than this many elements. @@ -262,7 +247,6 @@ struct OptimizationParams inline_cost_threshold::Int inline_nonleaf_penalty::Int inline_tupleret_bonus::Int - inline_error_path_cost::Int max_tuple_splat::Int compilesig_invokes::Bool trust_inference::Bool @@ -273,7 +257,6 @@ struct OptimizationParams inline_cost_threshold::Int, inline_nonleaf_penalty::Int, inline_tupleret_bonus::Int, - inline_error_path_cost::Int, max_tuple_splat::Int, compilesig_invokes::Bool, trust_inference::Bool, @@ -283,7 +266,6 @@ struct OptimizationParams inline_cost_threshold, inline_nonleaf_penalty, inline_tupleret_bonus, - inline_error_path_cost, max_tuple_splat, compilesig_invokes, trust_inference, @@ -296,7 +278,6 @@ function OptimizationParams( #=inline_cost_threshold::Int=# 100, #=inline_nonleaf_penalty::Int=# 1000, #=inline_tupleret_bonus::Int=# 250, - #=inline_error_path_cost::Int=# 20, #=max_tuple_splat::Int=# 32, #=compilesig_invokes::Bool=# true, #=trust_inference::Bool=# false, @@ -305,7 +286,6 @@ function OptimizationParams( inline_cost_threshold::Int = params.inline_cost_threshold, inline_nonleaf_penalty::Int = params.inline_nonleaf_penalty, inline_tupleret_bonus::Int = params.inline_tupleret_bonus, - inline_error_path_cost::Int = params.inline_error_path_cost, max_tuple_splat::Int = params.max_tuple_splat, compilesig_invokes::Bool = params.compilesig_invokes, trust_inference::Bool = params.trust_inference, @@ -315,7 +295,6 @@ function OptimizationParams( inline_cost_threshold, inline_nonleaf_penalty, inline_tupleret_bonus, - inline_error_path_cost, max_tuple_splat, compilesig_invokes, trust_inference, diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl index bae8ef5bae2421..7216c17641b595 100644 --- a/base/compiler/utilities.jl +++ b/base/compiler/utilities.jl @@ -436,51 +436,6 @@ function is_throw_call(e::Expr) return false end -function mark_throw_blocks!(src::CodeInfo, handler_at::Vector{Int}) - for stmt in find_throw_blocks(src.code, handler_at) - src.ssaflags[stmt] |= IR_FLAG_THROW_BLOCK - end - return nothing -end - -function find_throw_blocks(code::Vector{Any}, handler_at::Vector{Int}) - stmts = BitSet() - n = length(code) - for i in n:-1:1 - s = code[i] - if isa(s, Expr) - if s.head === :gotoifnot - if i+1 in stmts && s.args[2]::Int in stmts - push!(stmts, i) - end - elseif s.head === :return - # see `ReturnNode` handling - elseif is_throw_call(s) - if handler_at[i] == 0 - push!(stmts, i) - end - elseif i+1 in stmts - push!(stmts, i) - end - elseif isa(s, ReturnNode) - # NOTE: it potentially makes sense to treat unreachable nodes - # (where !isdefined(s, :val)) as `throw` points, but that can cause - # worse codegen around the call site (issue #37558) - elseif isa(s, GotoNode) - if s.label in stmts - push!(stmts, i) - end - elseif isa(s, GotoIfNot) - if i+1 in stmts && s.dest in stmts - push!(stmts, i) - end - elseif i+1 in stmts - push!(stmts, i) - end - end - return stmts -end - # using a function to ensure we can infer this @inline function slot_id(s) isa(s, SlotNumber) && return s.id