From 78c2db7260913d9b50ae64681d08097b70d3aa96 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Wed, 11 Sep 2024 01:08:50 +0900 Subject: [PATCH] proper termination, take 2 This PR is an alternative to JuliaDebug/LoweredCodeUtils.jl#99. This is built on top of JuliaDebug/LoweredCodeUtils.jl#116. With this PR, the following test cases now pass correctly: ```julia # Final block is not a `return`: Need to use `config::SelectiveEvalRecurse` explicitly ex = quote x = 1 yy = 7 @label loop x += 1 x < 5 || return yy @goto loop end frame = Frame(ModSelective, ex) src = frame.framecode.src edges = CodeEdges(ModSelective, src) config = SelectiveEvalRecurse() isrequired = lines_required(GlobalRef(ModSelective, :x), src, edges, config) selective_eval_fromstart!(config, frame, isrequired, true) @test ModSelective.x == 5 @test !isdefined(ModSelective, :yy) ``` The basic approach is overloading `JuliaInterpreter.step_expr!` and `LoweredCodeUtils.next_or_nothing!` for the new `SelectiveEvalController` type, as described below, to perform correct selective execution. When `SelectiveEvalController` is passed as the `recurse` argument of `selective_eval!`, the selective execution is adjusted as follows: - **Implicit return**: In Julia's IR representation (`CodeInfo`), the final block does not necessarily return and may `goto` another block. And if the `return` statement is not included in the slice in such cases, it is necessary to terminate `selective_eval!` when execution reaches such implicit return statements. `controller.implicit_returns` records the PCs of such return statements, and `selective_eval!` will return when reaching those statements. This is the core part of the fix for the test cases in JuliaDebug/LoweredCodeUtils.jl#99. - **CFG short-cut**: When the successors of a conditional branch are inactive, and it is safe to move the program counter from the conditional branch to the nearest common post-dominator of those successors, this short-cut is taken. This short-cut is not merely an optimization but is actually essential for the correctness of the selective execution. This is because, in `CodeInfo`, even if we simply fall-through dead blocks (i.e., increment the program counter without executing the statements of those blocks), it does not necessarily lead to the nearest common post-dominator block. And now [`lines_required`](@ref) or [`lines_required!`](@ref) will update the `SelectiveEvalController` passed as their argument to be appropriate for the program slice generated. One thing to note is that currently, the `controller` is not be recursed. That said, in Revise, which is the main consumer of LCU, there is no need for recursive selective execution, and so `selective_eval!` does not provide a system for inter-procedural selective evaluation. Accordingly `SelectiveEvalController` does not recurse too, but this can be left as a future extension. --- docs/src/api.md | 1 + src/LoweredCodeUtils.jl | 7 +- src/codeedges.jl | 149 +++++++++++++++++++++++++++++++++++----- src/packagedef.jl | 3 +- src/signatures.jl | 13 +--- test/codeedges.jl | 20 +++++- test/signatures.jl | 3 +- 7 files changed, 161 insertions(+), 35 deletions(-) diff --git a/docs/src/api.md b/docs/src/api.md index 1d434f3..f46f7be 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -17,6 +17,7 @@ lines_required lines_required! selective_eval! selective_eval_fromstart! +SelectiveEvalController ``` ## Internal utilities diff --git a/src/LoweredCodeUtils.jl b/src/LoweredCodeUtils.jl index bdeb7de..0bee58e 100644 --- a/src/LoweredCodeUtils.jl +++ b/src/LoweredCodeUtils.jl @@ -11,9 +11,10 @@ module LoweredCodeUtils using JuliaInterpreter using JuliaInterpreter: SSAValue, SlotNumber, Frame -using JuliaInterpreter: @lookup, moduleof, pc_expr, step_expr!, is_global_ref, is_quotenode_egal, whichtt, - next_until!, finish_and_return!, get_return, nstatements, codelocation, linetable, - is_return, lookup_return +using JuliaInterpreter: @lookup, moduleof, pc_expr, is_global_ref, is_quotenode_egal, + whichtt, next_until!, finish_and_return!, nstatements, codelocation, + linetable, is_return, lookup_return +import JuliaInterpreter: step_expr! include("packagedef.jl") diff --git a/src/codeedges.jl b/src/codeedges.jl index 8a1ec0b..323a0a2 100644 --- a/src/codeedges.jl +++ b/src/codeedges.jl @@ -173,6 +173,41 @@ function postprint_linelinks(io::IO, idx::Int, src::CodeInfo, cl::CodeLinks, bbc return nothing end +struct CFGShortCut + from::Int # pc of GotoIfNot with inactive 𝑰𝑵𝑭𝑳 blocks + to::Int # pc of the entry of the nearest common post-dominator of the GotoIfNot's successors +end + +""" + controller::SelectiveEvalController + +When this object is passed as the `recurse` argument of `selective_eval!`, +the selective execution is adjusted as follows: + +- **Implicit return**: In Julia's IR representation (`CodeInfo`), the final block does not + necessarily return and may `goto` another block. And if the `return` statement is not + included in the slice in such cases, it is necessary to terminate `selective_eval!` when + execution reaches such implicit return statements. `controller.implicit_returns` records + the PCs of such return statements, and `selective_eval!` will return when reaching those statements. + +- **CFG short-cut**: When the successors of a conditional branch are inactive, and it is + safe to move the program counter from the conditional branch to the nearest common + post-dominator of those successors, this short-cut is taken. + This short-cut is not merely an optimization but is actually essential for the correctness + of the selective execution. This is because, in `CodeInfo`, even if we simply fall-through + dead blocks (i.e., increment the program counter without executing the statements of those + blocks), it does not necessarily lead to the nearest common post-dominator block. + +These adjustments are necessary for performing selective execution correctly. +[`lines_required`](@ref) or [`lines_required!`](@ref) will update the `SelectiveEvalController` +passed as an argument to be appropriate for the program slice generated. +""" +struct SelectiveEvalController{RC} + inner::RC # N.B. this doesn't support recursive selective evaluation + implicit_returns::BitSet # pc where selective execution should terminate even if they're inactive + shortcuts::Vector{CFGShortCut} + SelectiveEvalController(inner::RC=finish_and_return!) where RC = new{RC}(inner, BitSet(), CFGShortCut[]) +end function namedkeys(cl::CodeLinks) ukeys = Set{GlobalRef}() @@ -566,8 +601,8 @@ end """ - isrequired = lines_required(obj::GlobalRef, src::CodeInfo, edges::CodeEdges) - isrequired = lines_required(idx::Int, src::CodeInfo, edges::CodeEdges) + isrequired = lines_required(obj::GlobalRef, src::CodeInfo, edges::CodeEdges, [controller::SelectiveEvalController]) + isrequired = lines_required(idx::Int, src::CodeInfo, edges::CodeEdges, [controller::SelectiveEvalController]) Determine which lines might need to be executed to evaluate `obj` or the statement indexed by `idx`. If `isrequired[i]` is `false`, the `i`th statement is *not* required. @@ -576,21 +611,26 @@ will end up skipping a subset of such statements, perhaps while repeating others See also [`lines_required!`](@ref) and [`selective_eval!`](@ref). """ -function lines_required(obj::GlobalRef, src::CodeInfo, edges::CodeEdges; kwargs...) +function lines_required(obj::GlobalRef, src::CodeInfo, edges::CodeEdges, + controller::SelectiveEvalController=SelectiveEvalController(); + kwargs...) isrequired = falses(length(edges.preds)) objs = Set{GlobalRef}([obj]) - return lines_required!(isrequired, objs, src, edges; kwargs...) + return lines_required!(isrequired, objs, src, edges, controller; kwargs...) end -function lines_required(idx::Int, src::CodeInfo, edges::CodeEdges; kwargs...) +function lines_required(idx::Int, src::CodeInfo, edges::CodeEdges, + controller::SelectiveEvalController=SelectiveEvalController(); + kwargs...) isrequired = falses(length(edges.preds)) isrequired[idx] = true objs = Set{GlobalRef}() - return lines_required!(isrequired, objs, src, edges; kwargs...) + return lines_required!(isrequired, objs, src, edges, controller; kwargs...) end """ - lines_required!(isrequired::AbstractVector{Bool}, src::CodeInfo, edges::CodeEdges; + lines_required!(isrequired::AbstractVector{Bool}, src::CodeInfo, edges::CodeEdges, + [controller::SelectiveEvalController]; norequire = ()) Like `lines_required`, but where `isrequired[idx]` has already been set to `true` for all statements @@ -602,9 +642,11 @@ should _not_ be marked as a requirement. For example, use `norequire = LoweredCodeUtils.exclude_named_typedefs(src, edges)` if you're extracting method signatures and not evaluating new definitions. """ -function lines_required!(isrequired::AbstractVector{Bool}, src::CodeInfo, edges::CodeEdges; kwargs...) +function lines_required!(isrequired::AbstractVector{Bool}, src::CodeInfo, edges::CodeEdges, + controller::SelectiveEvalController=SelectiveEvalController(); + kwargs...) objs = Set{GlobalRef}() - return lines_required!(isrequired, objs, src, edges; kwargs...) + return lines_required!(isrequired, objs, src, edges, controller; kwargs...) end function exclude_named_typedefs(src::CodeInfo, edges::CodeEdges) @@ -624,7 +666,9 @@ function exclude_named_typedefs(src::CodeInfo, edges::CodeEdges) return norequire end -function lines_required!(isrequired::AbstractVector{Bool}, objs, src::CodeInfo, edges::CodeEdges; norequire = ()) +function lines_required!(isrequired::AbstractVector{Bool}, objs, src::CodeInfo, edges::CodeEdges, + controller::SelectiveEvalController=SelectiveEvalController(); + norequire = ()) # Mark any requested objects (their lines of assignment) objs = add_requests!(isrequired, objs, edges, norequire) @@ -659,7 +703,10 @@ function lines_required!(isrequired::AbstractVector{Bool}, objs, src::CodeInfo, end # now mark the active goto nodes - add_active_gotos!(isrequired, src, cfg, postdomtree) + add_active_gotos!(isrequired, src, cfg, postdomtree, controller) + + # check if there are any implicit return blocks + record_implcit_return!(controller, isrequired, cfg) return isrequired end @@ -738,13 +785,14 @@ using Core.Compiler: CFG, BasicBlock, compute_basic_blocks # The basic algorithm is based on what was proposed in [^Wei84]. If there is even one active # block in the blocks reachable from a conditional branch up to its successors' nearest # common post-dominator (referred to as 𝑰𝑵𝑭𝑳 in the paper), it is necessary to follow -# that conditional branch and execute the code. Otherwise, execution can be short-circuited +# that conditional branch and execute the code. Otherwise, execution can be short-cut # from the conditional branch to the nearest common post-dominator. # -# COMBAK: It is important to note that in Julia's intermediate code representation (`CodeInfo`), -# "short-circuiting" a specific code region is not a simple task. Simply ignoring the path -# to the post-dominator does not guarantee fall-through to the post-dominator. Therefore, -# a more careful implementation is required for this aspect. +# It is important to note that in Julia's intermediate code representation (`CodeInfo`), +# "short-cutting" a specific code region is not a simple task. Simply incrementing the +# program counter without executing the statements of 𝑰𝑵𝑭𝑳 blocks does not guarantee that +# the program counter fall-throughs to the post-dominator. +# To handle such cases, `selective_eval!` needs to use `SelectiveEvalController`. # # [Wei84]: M. Weiser, "Program Slicing," IEEE Transactions on Software Engineering, 10, pages 352-357, July 1984. function add_control_flow!(isrequired, src::CodeInfo, cfg::CFG, postdomtree) @@ -819,8 +867,8 @@ function reachable_blocks(cfg, from_bb::Int, to_bb::Int) return visited end -function add_active_gotos!(isrequired, src::CodeInfo, cfg::CFG, postdomtree) - dead_blocks = compute_dead_blocks(isrequired, src, cfg, postdomtree) +function add_active_gotos!(isrequired, src::CodeInfo, cfg::CFG, postdomtree, controller::SelectiveEvalController) + dead_blocks = compute_dead_blocks!(isrequired, src, cfg, postdomtree, controller) changed = false for bbidx = 1:length(cfg.blocks) if bbidx ∉ dead_blocks @@ -838,7 +886,7 @@ function add_active_gotos!(isrequired, src::CodeInfo, cfg::CFG, postdomtree) end # find dead blocks using the same approach as `add_control_flow!`, for the converged `isrequired` -function compute_dead_blocks(isrequired, src::CodeInfo, cfg::CFG, postdomtree) +function compute_dead_blocks!(isrequired, src::CodeInfo, cfg::CFG, postdomtree, controller::SelectiveEvalController) dead_blocks = BitSet() for bbidx = 1:length(cfg.blocks) bb = cfg.blocks[bbidx] @@ -859,6 +907,11 @@ function compute_dead_blocks(isrequired, src::CodeInfo, cfg::CFG, postdomtree) end if !is_𝑰𝑵𝑭𝑳_active union!(dead_blocks, delete!(𝑰𝑵𝑭𝑳, postdominator)) + if postdominator ≠ 0 + postdominator_bb = cfg.blocks[postdominator] + postdominator_entryidx = postdominator_bb.stmts[begin] + push!(controller.shortcuts, CFGShortCut(termidx, postdominator_entryidx)) + end end end end @@ -866,6 +919,19 @@ function compute_dead_blocks(isrequired, src::CodeInfo, cfg::CFG, postdomtree) return dead_blocks end +function record_implcit_return!(controller::SelectiveEvalController, isrequired, cfg::CFG) + for bbidx = 1:length(cfg.blocks) + bb = cfg.blocks[bbidx] + if isempty(bb.succs) + i = findfirst(idx::Int->!isrequired[idx], bb.stmts) + if !isnothing(i) + push!(controller.implicit_returns, bb.stmts[i]) + end + end + end + nothing +end + # Do a traveral of "numbered" predecessors and find statement ranges and names of type definitions function find_typedefs(src::CodeInfo) typedef_blocks, typedef_names = UnitRange{Int}[], Symbol[] @@ -988,6 +1054,42 @@ function add_inplace!(isrequired, src, edges, norequire) return changed end +function JuliaInterpreter.step_expr!(controller::SelectiveEvalController, frame::Frame, @nospecialize(node), istoplevel::Bool) + if frame.pc in controller.implicit_returns + return nothing + elseif node isa GotoIfNot + for shortcut in controller.shortcuts + if shortcut.from == frame.pc + return frame.pc = shortcut.to + end + end + end + # TODO allow recursion: @invoke JuliaInterpreter.step_expr!(controller::Any, frame::Frame, node::Any, istoplevel::Bool) + JuliaInterpreter.step_expr!(controller.inner, frame, node, istoplevel) +end + +next_or_nothing!(frame::Frame) = next_or_nothing!(finish_and_return!, frame) +function next_or_nothing!(@nospecialize(recurse), frame::Frame) + pc = frame.pc + if pc < nstatements(frame.framecode) + return frame.pc = pc + 1 + end + return nothing +end +function next_or_nothing!(controller::SelectiveEvalController, frame::Frame) + if frame.pc in controller.implicit_returns + return nothing + elseif pc_expr(frame) isa GotoIfNot + for shortcut in controller.shortcuts + if shortcut.from == frame.pc + return frame.pc = shortcut.to + end + end + end + # TODO allow recursion: @invoke next_or_nothing!(controller::Any, frame::Frame) + next_or_nothing!(controller.inner, frame) +end + """ selective_eval!([recurse], frame::Frame, isrequired::AbstractVector{Bool}, istoplevel=false) @@ -1000,6 +1102,15 @@ See [`selective_eval_fromstart!`](@ref) to have that performed automatically. The default value for `recurse` is `JuliaInterpreter.finish_and_return!`. `isrequired` pertains only to `frame` itself, not any of its callees. +When `recurse::SelectiveEvalController` is specified, the selective evaluation execution +becomes fully correct. Conversely, with the default `finish_and_return!`, selective +evaluation may not be necessarily correct for all possible Julia code (see +https://github.com/JuliaDebug/LoweredCodeUtils.jl/pull/99 for more details). + +Ensure that the specified `controller` is properly synchronized with `isrequired`. +Additionally note that, at present, it is not possible to recurse the `controller`. +In other words, there is no system in place for interprocedural selective evaluation. + This will return either a `BreakpointRef`, the value obtained from the last executed statement (if stored to `frame.framedata.ssavlues`), or `nothing`. Typically, assignment to a variable binding does not result in an ssa store by JuliaInterpreter. diff --git a/src/packagedef.jl b/src/packagedef.jl index 31aac59..e99e3f6 100644 --- a/src/packagedef.jl +++ b/src/packagedef.jl @@ -12,7 +12,8 @@ const trackedheads = (:method,) const structdecls = (:_structtype, :_abstracttype, :_primitivetype) export signature, rename_framemethods!, methoddef!, methoddefs!, bodymethod -export CodeEdges, lines_required, lines_required!, selective_eval!, selective_eval_fromstart! +export CodeEdges, SelectiveEvalController, + lines_required, lines_required!, selective_eval!, selective_eval_fromstart! include("utils.jl") include("signatures.jl") diff --git a/src/signatures.jl b/src/signatures.jl index 0c399b1..b1352b8 100644 --- a/src/signatures.jl +++ b/src/signatures.jl @@ -326,6 +326,7 @@ function rename_framemethods!(@nospecialize(recurse), frame::Frame, methodinfos, set_to_running_name!(recurse, replacements, frame, methodinfos, selfcalls[idx], calledby, callee, caller) catch err @warn "skipping callee $callee (called by $caller) due to $err" + # showerror(stderr, err, stacktrace(catch_backtrace())) end end for sc in selfcalls @@ -468,16 +469,8 @@ end Advance the program counter without executing the corresponding line. If `frame` is finished, `nextpc` will be `nothing`. """ -next_or_nothing(frame, pc) = next_or_nothing(finish_and_return!, frame, pc) -next_or_nothing(@nospecialize(recurse), frame, pc) = pc < nstatements(frame.framecode) ? pc+1 : nothing -next_or_nothing!(frame) = next_or_nothing!(finish_and_return!, frame) -function next_or_nothing!(@nospecialize(recurse), frame) - pc = frame.pc - if pc < nstatements(frame.framecode) - return frame.pc = pc + 1 - end - return nothing -end +next_or_nothing(frame::Frame, pc::Int) = next_or_nothing(finish_and_return!, frame, pc) +next_or_nothing(@nospecialize(recurse), frame::Frame, pc::Int) = pc < nstatements(frame.framecode) ? pc+1 : nothing """ nextpc = skip_until(predicate, [recurse], frame, pc) diff --git a/test/codeedges.jl b/test/codeedges.jl index 51584e0..bf966f1 100644 --- a/test/codeedges.jl +++ b/test/codeedges.jl @@ -65,7 +65,7 @@ module ModSelective end # Check that the result of direct evaluation agrees with selective evaluation Core.eval(ModEval, ex) isrequired = lines_required(GlobalRef(ModSelective, :x), src, edges) - # theere is too much diversity in lowering across Julia versions to make it useful to test `sum(isrequired)` + # there is too much diversity in lowering across Julia versions to make it useful to test `sum(isrequired)` selective_eval_fromstart!(frame, isrequired) @test ModSelective.x === ModEval.x @test allmissing(ModSelective, (:y, :z, :a, :b, :k)) @@ -216,6 +216,24 @@ module ModSelective end @test ModSelective.k11 == 0 @test 3 <= ModSelective.s11 <= 15 + # Final block is not a `return`: Need to use `controller::SelectiveEvalController` explicitly + ex = quote + x = 1 + yy = 7 + @label loop + x += 1 + x < 5 || return yy + @goto loop + end + frame = Frame(ModSelective, ex) + src = frame.framecode.src + edges = CodeEdges(ModSelective, src) + controller = SelectiveEvalController() + isrequired = lines_required(GlobalRef(ModSelective, :x), src, edges, controller) + selective_eval_fromstart!(controller, frame, isrequired, true) + @test ModSelective.x == 5 + @test !isdefined(ModSelective, :yy) + # Control-flow in an abstract type definition ex = :(abstract type StructParent{T, N} <: AbstractArray{T, N} end) frame = Frame(ModSelective, ex) diff --git a/test/signatures.jl b/test/signatures.jl index 4c0e92e..b37b380 100644 --- a/test/signatures.jl +++ b/test/signatures.jl @@ -415,9 +415,10 @@ bodymethtest5(x, y=Dict(1=>2)) = 5 oldenv = Pkg.project().path try # we test with the old version of CBinding, let's do it in an isolated environment + # so we don't cause package conflicts with everything else Pkg.activate(; temp=true, io=devnull) - @info "Adding CBinding to the environment for test purposes" + @info "Adding CBinding v0.9.4 to the environment for test purposes" Pkg.add(; name="CBinding", version="0.9.4", io=devnull) # `@cstruct` isn't defined for v1.0 and above m = Module()