Skip to content

Commit

Permalink
Start working on version with multiple inlinings
Browse files Browse the repository at this point in the history
  • Loading branch information
alexcere committed Nov 8, 2024
1 parent 48f3244 commit 0f24141
Show file tree
Hide file tree
Showing 6 changed files with 258 additions and 37 deletions.
12 changes: 6 additions & 6 deletions src/cfg_methods/cfg_block_actions/inline_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class InlineFunction(BlockAction):
"""

def __init__(self, instr_position: int, cfg_block: CFGBlock, cfg_blocklist: CFGBlockList,
function_name: str, cfg_object: CFGObject):
cfg_function: CFGFunction):
"""
It receives the position in which we want to split, the corresponding block in which we are appending
the corresponding block list, its block list, the function name and the block list
Expand All @@ -27,10 +27,9 @@ def __init__(self, instr_position: int, cfg_block: CFGBlock, cfg_blocklist: CFGB
self._instr_position: int = instr_position
self._cfg_block: CFGBlock = cfg_block
self._cfg_blocklist: CFGBlockList = cfg_blocklist
self._function_name: function_name_T = function_name
self._cfg_function: CFGFunction = cfg_object.functions[function_name]
self._function_name: function_name_T = cfg_function.name
self._cfg_function: CFGFunction = cfg_function
self._function_blocklist: CFGBlockList = self._cfg_function.blocks
self._cfg_object: CFGObject = cfg_object
self._first_sub_block: Optional[CFGObject] = None
self._second_sub_block: Optional[CFGObject] = None

Expand All @@ -40,7 +39,9 @@ def perform_action(self):
# First we need to split the block in the function call, which is given by the instr position.
# As a final check, we ensure the instruction in that position corresponds to the function name passed as
# an argument
assert call_instruction.get_op_name() == self._function_name, \
# Considering we might have duplicated the function multiple times, we just check that the original call matches
# the start of the function name
assert self._function_name.startswith(call_instruction.get_op_name()), \
f"Expected function call {self._function_name} in position {self._instr_position} but got instead" \
f"{self._cfg_block.get_instructions()}"

Expand Down Expand Up @@ -133,7 +134,6 @@ def perform_action(self):
self._function_blocklist.blocks.clear()
del self._function_blocklist
del self._cfg_function
self._cfg_object.functions.pop(self._function_name)

@property
def first_sub_block(self) -> Optional[CFGBlock]:
Expand Down
97 changes: 97 additions & 0 deletions src/cfg_methods/cost_computation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""
Module to compute an estimation on the gas and bytes-in-size spent. Useful for determining whether a function must
be inlined or not
"""
from typing import Dict, Tuple, Set
from global_params.types import function_name_T, var_id_T, component_name_T
from parser.cfg import CFG
from parser.cfg_function import CFGFunction
from parser.cfg_block_list import CFGBlockList
from parser.cfg_block import CFGBlock
from parser.utils_parser import get_push_number_hex

# Type of the cost we are interested on studying: gas and size costs
costs_T = Tuple[int, int]

# Type of the dict that maps each function name to an estimation on its gas and size costs
function2costs_T = Dict[function_name_T, costs_T]


def compute_gas_bytes(cfg: CFG) -> Dict[component_name_T, function2costs_T]:
"""
Estimates the gas and size costs of all the function inside the CFG structure
"""
function2costs = dict()
for object_id, cfg_object in cfg.objectCFG.items():
current_object2costs = dict()

# We also consider the information per function
for function_name in cfg_object.functions:
compute_gas_bytes_function(function_name, cfg_object.functions, current_object2costs)

function2costs[object_id] = current_object2costs
sub_object = cfg.get_subobject()

if sub_object is not None:
function2costs.update(compute_gas_bytes(sub_object))

return function2costs


def compute_gas_bytes_function(function_name: function_name_T, function_dict: Dict[function_name_T, CFGFunction],
function2costs: function2costs_T) -> costs_T:
function_costs = function2costs.get(function_name, None)
if function_costs is not None:
return function_costs

# We need to keep track of which values are introduced and consumed, as we can count how many times they must
# be duplicated. Initially, we have the elements passed as input
previously_introduced = set(function_dict[function_name].get_arguments())

gas_cost, size_cost = compute_gas_bytes_block_list(function_dict[function_name].blocks, function_dict,
function2costs, previously_introduced)
function2costs[function_name] = gas_cost, size_cost
return gas_cost, size_cost


def compute_gas_bytes_block_list(cfg_block_list: CFGBlockList, function_dict: Dict[function_name_T, CFGFunction],
function2costs: function2costs_T, previously_introduced: Set[var_id_T]) -> costs_T:
gas_cost, size_cost = 0, 0

for block in cfg_block_list.blocks.values():
block_gas, block_size = compute_gas_bytes_block(block, function_dict, function2costs, previously_introduced)
gas_cost += block_gas
size_cost += block_size
return gas_cost, size_cost


def compute_gas_bytes_block(block: CFGBlock, function_dict: Dict[function_name_T, CFGFunction],
function2costs: function2costs_T, previously_introduced: Set[var_id_T]) -> costs_T:
gas_cost, size_cost = 0, 0
for instruction in block.get_instructions():

# First we account the cost of the op name
if instruction.get_op_name() in function_dict.keys():
gas_cost, size_cost = compute_gas_bytes_function(instruction.get_op_name(), function_dict, function2costs)
else:
gas_cost += instruction.gas_spent_op
size_cost += instruction.bytes_required

# Then we consider that every argument must be either duplicated or pushed (for constants)
# TODO: think more carefully if we can make some assumptions
for in_value in instruction.get_in_args():
if in_value.startswith("0x"):
# PUSH0 case
gas_cost += 2 if in_value == "0x00" else 3
size_cost += 1 if in_value == "0x00" else (1 + get_push_number_hex(in_value))
elif in_value in previously_introduced:
previously_introduced.remove(in_value)
else:
# Account for a DUPx
gas_cost += 3
size_cost += 1

for out_value in instruction.get_out_args():
previously_introduced.add(out_value)

return gas_cost, size_cost
128 changes: 98 additions & 30 deletions src/cfg_methods/function_inlining.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Module to perform function inlining.
"""
import json
from copy import deepcopy
from typing import Set, Dict, Tuple, List
from collections import defaultdict

Expand All @@ -10,23 +11,29 @@
from global_params.types import block_id_T, component_name_T, function_name_T, block_list_id_T
from parser.cfg_block import CFGBlock
from parser.cfg_block_list import CFGBlockList
from parser.cfg_function import CFGFunction
from parser.cfg_object import CFGObject
from parser.cfg import CFG
from cfg_methods.cfg_block_actions.inline_function import InlineFunction
from cfg_methods.utils import union_find_search
from cfg_methods.cost_computation import function2costs_T, compute_gas_bytes

# For each time a function is invoked, we store the position of the instruction (int) in the
# block (blok_id_T) that appears in the block list (block_list_id)
function_call_info_T = Dict[str, List[Tuple[int, block_id_T, block_list_id_T]]]
call_info_T = Tuple[int, block_id_T, block_list_id_T]

function2call_info_T = Dict[str, List[call_info_T]]


def inline_functions(cfg: CFG) -> None:
"""
Inlines the functions that are invoked just in one place
"""
cfg_object2modify: Dict[component_name_T, function_call_info_T] = generate_function2information(cfg)
cfg_object2modify: Dict[component_name_T, function2call_info_T] = generate_function2information(cfg)
cfg_function2costs = compute_gas_bytes(cfg)

for object_id, cfg_object in cfg.objectCFG.items():
inline_functions_cfg_object(cfg_object, cfg_object2modify[object_id])
inline_functions_cfg_object(cfg_object, cfg_object2modify[object_id], cfg_function2costs[object_id])
sub_object = cfg.get_subobject()

if sub_object is not None:
Expand All @@ -35,7 +42,7 @@ def inline_functions(cfg: CFG) -> None:

# Methods to compute the invocation information

def generate_function2information(cfg: CFG) -> Dict[function_name_T, function_call_info_T]:
def generate_function2information(cfg: CFG) -> Dict[function_name_T, function2call_info_T]:
"""
For each cfg object, a dictionary is produced that links each function to the position, block and block list
in which it is used
Expand All @@ -60,7 +67,7 @@ def generate_function2information(cfg: CFG) -> Dict[function_name_T, function_ca


def generate_function2blocks_block_list(cfg_block_list: CFGBlockList, function_names: Set[function_name_T],
function2blocks: function_call_info_T) -> None:
function2blocks: function2call_info_T) -> None:
"""
Links the function calls that appear in the block list to the exact block and the block list
"""
Expand All @@ -71,7 +78,8 @@ def generate_function2blocks_block_list(cfg_block_list: CFGBlockList, function_n


# Methods to perform the inlining of cfg objects
def inline_functions_cfg_object(cfg_object: CFGObject, function_call_info: function_call_info_T):
def inline_functions_cfg_object(cfg_object: CFGObject, function_call_info: function2call_info_T,
function2costs: function2costs_T):
# Dict that maps each initial block name in the CFG to the set of blocks in which it can be split
block2current: Dict[block_id_T, List[block_id_T]] = dict()

Expand All @@ -80,37 +88,49 @@ def inline_functions_cfg_object(cfg_object: CFGObject, function_call_info: funct

for function_name, call_info in function_call_info.items():

cfg_function = cfg_object.functions[function_name]
# Only consider blocks for inlining that have just one invocation
if len(call_info) == 1:
instr_pos, cfg_block_name, cfg_block_list_name = call_info[0]
if len(call_info) == 1 or _must_be_inlined(function_name, call_info, function2costs,
len(cfg_function.exits)):

for call_idx, (instr_pos, cfg_block_name, cfg_block_list_name) in enumerate(call_info):

# First we find in which block list the function block list is stored
# As many substitutions can happen, we have to iterate recursively to find the most recent one
current_block_list_name = union_find_search(cfg_block_list_name, block_list2current)
print(current_block_list_name)
cfg_block_list = cfg_object.get_block_list(current_block_list_name)

# Then we determine whether the function has been split
split_blocks = block2current.get(cfg_block_name, [cfg_block_name])

# We have to determine the corresponding index if there are multiple blocks
if len(split_blocks) > 1:
split_block_index, position_index = _determine_idx(instr_pos, split_blocks, cfg_block_list)
else:
split_block_index = 0
position_index = instr_pos + _adjust_phi_function_idx_misalignment(cfg_block_list.blocks[split_blocks[split_block_index]])

# First we find in which block list the function block list is stored
# As many substitutions can happen, we have to iterate recursively to find the most recent one
current_block_list_name = union_find_search(cfg_block_list_name, block_list2current)
cfg_block_list = cfg_object.get_block_list(current_block_list_name)
function_to_inline, renaming_dict = _generate_function_to_inline(cfg_function, call_idx, len(call_info))

# Then we determine whether the function has been split
split_blocks = block2current.get(cfg_block_name, [cfg_block_name])
inline_action = InlineFunction(position_index, cfg_block_list.blocks[split_blocks[split_block_index]],
cfg_block_list, function_to_inline)

# We have to determine the corresponding index if there are multiple blocks
if len(split_blocks) > 1:
split_block_index, position_index = _determine_idx(instr_pos, split_blocks, cfg_block_list)
else:
split_block_index = 0
position_index = instr_pos + _adjust_phi_function_idx_misalignment(cfg_block_list.blocks[split_blocks[split_block_index]])
inline_action.perform_action()

inline_action = InlineFunction(position_index, cfg_block_list.blocks[split_blocks[split_block_index]],
cfg_block_list, function_name, cfg_object)
inline_action.perform_action()
# Uncomment for validation
# is_correct, reason = validate_block_list_comes_from(cfg_block_list)

# Uncomment for validation
# is_correct, reason = validate_block_list_comes_from(cfg_block_list)
# Finally, we have to update the information of both the block lists and blocks
new_function_name = cfg_function.name

# Finally, we have to update the information of both the block lists and blocks
block_list2current[function_name] = current_block_list_name
block2current[cfg_block_name] = split_blocks[:split_block_index] + \
[inline_action.first_sub_block.block_id,
inline_action.second_sub_block.block_id] + split_blocks[split_block_index+1:]
block_list2current[new_function_name] = current_block_list_name
block2current[cfg_block_name] = split_blocks[:split_block_index] + \
[inline_action.first_sub_block.block_id,
inline_action.second_sub_block.block_id] + split_blocks[
split_block_index + 1:]
# As we have decided to inline, we can just remove it from the list of functions
cfg_object.functions.pop(function_name)


def _determine_idx(instr_idx: int, split_block_names: List[block_id_T], cfg_block_list: CFGBlockList) \
Expand Down Expand Up @@ -138,3 +158,51 @@ def _adjust_phi_function_idx_misalignment(block: CFGBlock) -> int:
# Here we need to reassign the index considering the preceding phi functions in the block, as
# we have skipped them
return len([True for instr in block.get_instructions() if instr.get_op_name() == "PhiFunction"])


def _must_be_inlined(function_name: function_name_T, call_info_list: List[call_info_T], function2costs: function2costs_T,
n_function_exits: int):
"""
Returns whether a function must be inlined or not, according to the call and costs info
"""
gas_cost, size_cost = function2costs[function_name]

# "Extra costs" with no inlining: introducing two tags + 2 JUMPDEST + 1 entry jump + multiple exit jumps
no_inlining_extra_gas = (3 * 3) + 2 * 1 + 3 * (1 + n_function_exits)

# Assuming the tags take 2 bytes
no_inlining_extra_size = (3 * 3) + 2 * 1 + (1 + n_function_exits)

# "Extra costs" with inlining: number of bytes duplicated by number of calls
inlining_extra_size = size_cost * (len(call_info_list) - 1)

# Decision for whether a function must be inlined or not

# Heuristics: 20 bytes = 1 gas
# TODO: devise good heuristics for inlining
return (inlining_extra_size - no_inlining_extra_size) <= 20 * no_inlining_extra_gas


def _generate_function_to_inline(original_function: CFGFunction, current_call_idx: int,
n_calls: int) -> Tuple[CFGFunction, Dict[block_id_T, block_id_T]]:
"""
We must rename the blocks when inlining to avoid conflicts, as the function can be inlined multiple times in the
same function (and hence, the same blocks would appear multiple times). We also return the renaming dict
"""
# If there is just one call, we avoid renaming the blocks
if n_calls == 1:
return original_function, dict()
# If we are making multiple copies, we copy it call_idx - 1 times, as the last one should remove it
elif current_call_idx == n_calls - 1:
copied_function = original_function
else:
copied_function = deepcopy(original_function)

# We have to modify the block list inside the copied function first
block_list = copied_function.blocks
renaming_dict = {block_name: f"{block_name}_copy_{current_call_idx}" for block_name in block_list.blocks}
block_list.rename_blocks(renaming_dict)

copied_function.exits = [renaming_dict.get(exit_id, exit_id) for exit_id in copied_function.exits]
copied_function.name = f"{copied_function.name}_copy_{current_call_idx}"
return copied_function, renaming_dict
3 changes: 3 additions & 0 deletions src/parser/cfg_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ def set_condition(self, cond: var_id_T) -> None:
def get_block_id(self) -> str:
return self.block_id

def set_block_id(self, value: var_id_T) -> None:
self.block_id = value

def get_instructions(self) -> List[CFGInstruction]:
return self._instructions

Expand Down
Loading

0 comments on commit 0f24141

Please sign in to comment.