From 502f42771de6f9e5038bec32b224e84221d66532 Mon Sep 17 00:00:00 2001 From: Caleb Date: Wed, 23 Aug 2023 17:57:22 -0400 Subject: [PATCH] correct implementation of separate relu unit --- calyx-py/calyx/builder.py | 8 + frontends/systolic-lang/gen-systolic.py | 264 +++---- tests/frontend/systolic/array-1.expect | 22 +- tests/frontend/systolic/array-2.expect | 32 +- tests/frontend/systolic/array-3.expect | 38 +- z.futil | 951 ------------------------ 6 files changed, 172 insertions(+), 1143 deletions(-) delete mode 100644 z.futil diff --git a/calyx-py/calyx/builder.py b/calyx-py/calyx/builder.py index b3eab910bf..d5322b21b3 100644 --- a/calyx-py/calyx/builder.py +++ b/calyx-py/calyx/builder.py @@ -172,6 +172,14 @@ def get_group(self, name: str) -> GroupBuilder: f"Group `{name}' not found in component {self.component.name}" ) + def try_get_group(self, name: str) -> GroupBuilder: + """Tries to get a group builder by name. If cannot find it, return None""" + out = self.index.get(name) + if out and isinstance(out, GroupBuilder): + return out + else: + return None + def group(self, name: str, static_delay: Optional[int] = None) -> GroupBuilder: """Create a new group with the given name and (optional) static delay.""" group = ast.Group(ast.CompVar(name), connections=[], static_delay=static_delay) diff --git a/frontends/systolic-lang/gen-systolic.py b/frontends/systolic-lang/gen-systolic.py index 7566ab39d8..8807a9f3bc 100755 --- a/frontends/systolic-lang/gen-systolic.py +++ b/frontends/systolic-lang/gen-systolic.py @@ -16,6 +16,21 @@ PE_NAME = "mac_pe" DEPTH = "depth" +# Naming scheme for generated groups. Used to keep group names consistent +# across structure and control. +NAME_SCHEME = { + # Indexing into the memory + "index name": "{prefix}_idx", + "index init": "{prefix}_idx_init", + "index update": "{prefix}_idx_update", + # Move data from main memories + "memory move": "{prefix}_move", + "out mem move": "{pe}_out_write", + # Move data between internal registers + "register move down": "{pe}_down_move", + "register move right": "{pe}_right_move", +} + class CalyxAdd: """ @@ -37,13 +52,6 @@ def __eq__(self, other): def __hash__(self): return hash(self.const) - def __repr__(self): - return ( - str(cb.ExprBuilder.unwrap(self.port).item.id.name) - + "_plus_" - + str(self.const) - ) - def __str__(self): return ( str(cb.ExprBuilder.unwrap(self.port).item.id.name) @@ -51,6 +59,19 @@ def __str__(self): + str(self.const) ) + def build_group(self, comp: cb.ComponentBuilder) -> cb.GroupBuilder: + """ + Builds a static Calyx group (latency 1) that implemnets `self` + Note that we avoid creating duplicate groups. + """ + group_name = str(self) + "_group" + if comp.try_get_group(group_name) is None: + add = comp.add(BITWIDTH, str(self)) + with comp.static_group(group_name, 1): + add.left = self.port + add.right = self.const + return group_name + def pe(prog: cb.Builder, leaky_relu): comp = prog.component(name=PE_NAME, latency=1) @@ -95,8 +116,9 @@ def leaky_relu_comp(prog: cb.Builder): comp = prog.component(name="leaky_relu") comp.input("value", BITWIDTH) comp.input("index", BITWIDTH) - add_write_mem_argument(comp, OUT_MEM, BITWIDTH) - add_register_argument(comp, "idx_reg") + # Takes a memory and register (i.e., arguments that essentially act as ref cells) + add_write_mem_arguments(comp, OUT_MEM, BITWIDTH) + add_register_arguments(comp, "idx_reg") this = comp.this() @@ -120,43 +142,58 @@ def leaky_relu_comp(prog: cb.Builder): # a) multiplier is done, so we write fp_mult.out to mem # b) this.value >=0 (i.e., !(this.value < 0)) so we write this.value to mem write_mem.in_ = (fp_mult.done | ~lt.out) @ 1 - # trigger the multiplier when we're not writing to memory + # Trigger the multiplier when we're not writing to memory. fp_mult.left = numeric_types.FixedPoint( str(float_to_fixed_point(0.01, FRACWIDTH)), BITWIDTH, INTWIDTH, True ).unsigned_integer() fp_mult.right = this.value fp_mult.go = ~(write_mem.out) @ 1 - # increment idx_reg while we are writing to memory. + # Increment idx_reg during the cycle that we write to memory. incr_idx.left = this.idx_reg_out incr_idx.right = 1 this.idx_reg_in = write_mem.out @ incr_idx.out this.idx_reg_write_en = write_mem.out @ 1 - # write to memory + # Write to memory. g.asgn(write_en_port, 1, write_mem.out) g.asgn(addr0_port, this.index) g.asgn(write_data_port, this.value, ~lt.out) g.asgn(write_data_port, fp_mult.out, lt.out) + # Groups is done once we have written to memory. g.done = write_done_port comp.control = py_ast.Enable("do_relu") -# Naming scheme for generated groups. Used to keep group names consistent -# across structure and control. -NAME_SCHEME = { - # Indexing into the memory - "index name": "{prefix}_idx", - "index init": "{prefix}_idx_init", - "index update": "{prefix}_idx_update", - # Move data from main memories - "memory move": "{prefix}_move", - "out mem move": "{pe}_out_write", - # Move data between internal registers - "register move down": "{pe}_down_move", - "register move right": "{pe}_right_move", -} +def add_read_mem_arguments(comp: cb.ComponentBuilder, name, addr_width): + """ + Add arguments to component `comp` if we want to read from a mem named `name` with + width of `addr_width` + """ + comp.input(f"{name}_read_data", BITWIDTH) + comp.output(f"{name}_addr0", addr_width) + + +def add_write_mem_arguments(comp: cb.ComponentBuilder, name, addr_width): + """ + Add arguments to component `comp` if we want to write to a mem named `name` with + width of `addr_width` inside `comp.` + """ + comp.output(f"{name}_addr0", addr_width) + comp.output(f"{name}_write_data", BITWIDTH) + comp.output(f"{name}_write_en", 1) + comp.input(f"{name}_done", 1) + + +def add_register_arguments(comp: cb.ComponentBuilder, name): + """ + Add arguments to component `comp` if we want to use a register named + `name` inside `comp.` + """ + comp.output(f"{name}_write_en", 1) + comp.output(f"{name}_in", BITWIDTH) + comp.input(f"{name}_out", BITWIDTH) def instantiate_indexor(comp: cb.ComponentBuilder, prefix, width) -> cb.CellBuilder: @@ -192,36 +229,6 @@ def instantiate_indexor(comp: cb.ComponentBuilder, prefix, width) -> cb.CellBuil return reg -def add_read_mem_argument(comp: cb.ComponentBuilder, name, addr_width): - """ - Add arguments to component `comp` if we want to read from a mem named `name` with - width of `addr_width` - """ - comp.input(f"{name}_read_data", BITWIDTH) - comp.output(f"{name}_addr0", addr_width) - - -def add_write_mem_argument(comp: cb.ComponentBuilder, name, addr_width): - """ - Add arguments to component `comp` if we want to write to a mem named `name` with - width of `addr_width` inside `comp.` - """ - comp.output(f"{name}_addr0", addr_width) - comp.output(f"{name}_write_data", BITWIDTH) - comp.output(f"{name}_write_en", 1) - comp.input(f"{name}_done", 1) - - -def add_register_argument(comp: cb.ComponentBuilder, name): - """ - Add arguments to component `comp` if we want to write to use a register named - `name` inside `comp.` - """ - comp.output(f"{name}_write_en", 1) - comp.output(f"{name}_in", BITWIDTH) - comp.input(f"{name}_out", BITWIDTH) - - def instantiate_memory(comp: cb.ComponentBuilder, top_or_left, idx, size): """ Instantiates: @@ -241,7 +248,7 @@ def instantiate_memory(comp: cb.ComponentBuilder, top_or_left, idx, size): idx_width = bits_needed(size) # Instantiate the memory - add_read_mem_argument(comp, name, idx_width) + add_read_mem_arguments(comp, name, idx_width) this = comp.this() addr0_port = cb.ExprBuilder.unwrap(this.port(name + "_addr0")) read_data_port = this.port(name + "_read_data") @@ -310,30 +317,38 @@ def instantiate_output_move(comp: cb.ComponentBuilder, row, col, cols): g.asgn(write_en_port, 1) -def instantiate_relu_cond_reg( - comp: cb.ComponentBuilder, - num_rows, -): +def instantiate_cond_reg_group(comp: cb.ComponentBuilder, num_rows, leaky_relu): """ Writes into `cond_reg`, the condition register for the while loop. - `cond_reg` basically checks whether the relu operation has finished yet - for all rows of the array. If so, it sets `cond_reg` to lo. Otherwise it - sets it to high. + For leaky relu, it checks whether all rows have finished with their + relu operations. + For the non leaky relu, it checks the iteration count. """ cond_reg = comp.get_cell("cond_reg") - cond_wire = comp.wire("cond_wire", 1) - for r in range(num_rows): - relu_finished_wire = comp.get_cell(f"relu_finished_wire_r{r}") - if r == 0: - guard = relu_finished_wire.out - else: - guard = guard & relu_finished_wire.out - - with comp.static_group("write_cond_reg", 1): - cond_wire.in_ = guard @ 1 - cond_reg.in_ = ~cond_wire.out @ 1 - cond_reg.in_ = cond_wire.out @ 0 - cond_reg.write_en = 1 + if leaky_relu: + # Check if all relu operations have finished for each row + cond_wire = comp.wire("cond_wire", 1) + for r in range(num_rows): + relu_finished_wire = comp.get_cell(f"relu_finished_wire_r{r}") + if r == 0: + guard = relu_finished_wire.out + else: + guard = guard & relu_finished_wire.out + with comp.static_group("write_cond_reg", 1): + cond_wire.in_ = guard @ 1 + cond_reg.in_ = ~cond_wire.out @ 1 + cond_reg.in_ = cond_wire.out @ 0 + cond_reg.write_en = 1 + else: + # Check iteration count + iter_limit = comp.get_cell("iter_limit") + add = comp.get_cell("idx_add") + lt_iter_limit = comp.lt(BITWIDTH, "lt_iter_limit") + with comp.static_group("lt_iter_limit_group", 1): + lt_iter_limit.left = add.out + lt_iter_limit.right = iter_limit.out + cond_reg.in_ = lt_iter_limit.out + cond_reg.write_en = 1 def gen_schedules( @@ -420,35 +435,22 @@ def accum_nec_ranges(nec_ranges, schedule): return nec_ranges -def try_build_calyx_add(comp, obj): - """ - Attempts to build an adder for obj, with name str(obj) and group name - str(obj) + "_group" that adds obj.port and obj.const - Returns true if we actually build it - Returns false otherwise - """ - if type(obj) == CalyxAdd: - add_str = str(obj) - if comp.try_get_cell(add_str) is None: - add = comp.add(BITWIDTH, add_str) - with comp.static_group(add_str + "_group", 1): - add.left = obj.port - add.right = obj.const - return True - return False - - -def instantiate_calyx_adds(comp, nec_ranges): +def instantiate_calyx_adds(comp, nec_ranges) -> list: """ Instantiates the CalyxAdds objects to adders and actual groups that add things """ - depth_adders = [] + calyx_add_groups = set() for lo, hi in nec_ranges: - if try_build_calyx_add(comp, lo): - depth_adders.append(str(lo) + "_group") - if try_build_calyx_add(comp, hi): - depth_adders.append(str(hi) + "_group") - return depth_adders + if type(lo) == CalyxAdd: + group_name = lo.build_group(comp) + calyx_add_groups.add(group_name) + if type(hi) == CalyxAdd: + group_name = hi.build_group(comp) + calyx_add_groups.add(group_name) + group_list = list(calyx_add_groups) + # sort for testing purposes + group_list.sort() + return group_list def instantiate_idx_cond_groups(comp: cb.ComponentBuilder, leaky_relu): @@ -471,17 +473,6 @@ def instantiate_idx_cond_groups(comp: cb.ComponentBuilder, leaky_relu): with comp.static_group("init_cond_reg", 1): cond_reg.in_ = 1 cond_reg.write_en = 1 - # Only check iter_limit if not leaky_relu. - # For leaky_relu we don't check iterations, we check if the relu - # operations are finished yet - if not leaky_relu: - iter_limit = comp.get_cell("iter_limit") - lt_iter_limit = comp.lt(BITWIDTH, "lt_iter_limit") - with comp.static_group("lt_iter_limit_group", 1): - lt_iter_limit.left = add.out - lt_iter_limit.right = iter_limit.out - cond_reg.in_ = lt_iter_limit.out - cond_reg.write_en = 1 def init_dyn_vals(comp: cb.ComponentBuilder, depth_port, rem_iter_limit, leaky_relu): @@ -626,7 +617,7 @@ def build_assignment( # Wire that tells us we are finished with relu operation for this row. relu_finished_wire = comp.wire(f"relu_finished_wire_r{row}", 1) - # Annoying memory port stuff because we can't use ref cells + # Annoying memory stuff because we can't use ref cells this = comp.this() mem_name = OUT_MEM + f"_{row}" addr0_port = cb.ExprBuilder.unwrap(this.port(mem_name + "_addr0")) @@ -729,22 +720,6 @@ def execute_if_between(comp: cb.ComponentBuilder, start, end, body): ] -def execute_if_register(comp: cb.ComponentBuilder, register, body): - """ - body is a list of control stmts - if body is empty, return an empty list - otherwise, builds an if stmt that executes body in parallel reg.out is high - """ - if not body: - return [] - return [ - cb.static_if( - register.out, - py_ast.StaticParComp(body), - ) - ] - - def generate_control( comp: cb.ComponentBuilder, top_length, @@ -752,7 +727,7 @@ def generate_control( left_length, left_depth, schedules, - depth_adders, + calyx_add_groups, nec_ranges, leaky_relu, ): @@ -785,10 +760,6 @@ def generate_control( py_ast.Enable("init_min_depth"), py_ast.Enable("init_cond_reg"), ] - # + [ - # py_ast.Enable(f"init_idx_between_{lo}_{hi}") - # for (lo, hi) in filter(lambda x: x[1] is not None, nec_ranges) - # ] + [py_ast.Enable(f"init_idx_between_{lo}_{hi}") for (lo, hi) in nec_ranges] ) if not leaky_relu: @@ -879,8 +850,8 @@ def counter(): # build the control stmts that assign correct values to # idx_between_{start}_{end}_reg, which is what the if stmts above^ rely on incr_stmts.append(py_ast.Enable(f"idx_between_{start}_{end}_group")) - for depth_adder_group in depth_adders: - incr_stmts.append(py_ast.Enable(depth_adder_group)) + for calyx_add_group in calyx_add_groups: + incr_stmts.append(py_ast.Enable(calyx_add_group)) while_ctrl = [py_ast.StaticParComp(control_stmts), py_ast.StaticParComp(incr_stmts)] if leaky_relu: @@ -929,7 +900,7 @@ def create_systolic_array( nec_ranges = set() for sched in schedules.values(): accum_nec_ranges(nec_ranges, sched) - depth_adders = instantiate_calyx_adds(computational_unit, nec_ranges) + calyx_add_groups = instantiate_calyx_adds(computational_unit, nec_ranges) for row in range(left_length): for col in range(top_length): @@ -946,7 +917,7 @@ def create_systolic_array( idx_width = BITWIDTH # Instantiate output memory for i in range(left_length): - add_write_mem_argument(computational_unit, OUT_MEM + f"_{i}", idx_width) + add_write_mem_arguments(computational_unit, OUT_MEM + f"_{i}", idx_width) # Instantiate all the PEs for row in range(left_length): @@ -973,11 +944,12 @@ def create_systolic_array( instantiate_init_group(computational_unit, start, end) if leaky_relu: - # Instantiate groups to compute Relu. + # Instantiate groups to compute relu. for row in range(left_length): instantiate_relu_groups(computational_unit, row, top_length) - # Write into the cond reg of the while loop. - instantiate_relu_cond_reg(computational_unit, left_length) + + # Instantiate group that writes into the cond reg of the while loop. + instantiate_cond_reg_group(computational_unit, left_length, leaky_relu) # Generate the control and set the source map control, source_map = generate_control( @@ -987,16 +959,16 @@ def create_systolic_array( left_length, left_depth, schedules, - depth_adders, + calyx_add_groups, nec_ranges, leaky_relu, ) computational_unit.control = control prog.program.meta = source_map - # build the main component - # instantaites the systolic array/computational_unit and the mems, - # and then invokes it + # Build the main component. + # Instantiates the systolic array/computational_unit and the mems, + # and then invokes it. main = prog.component("main") systolic_array = main.cell("systolic_array", computational_unit) invoke_args = {} diff --git a/tests/frontend/systolic/array-1.expect b/tests/frontend/systolic/array-1.expect index 62729030f1..2619029ec5 100644 --- a/tests/frontend/systolic/array-1.expect +++ b/tests/frontend/systolic/array-1.expect @@ -27,7 +27,7 @@ static<1> component mac_pe(top: 32, left: 32, mul_ready: 1) -> (out: 32) { } } } -component systolic_array_comp(depth: 32, t0_read_data: 32, l0_read_data: 32) -> (t0_addr0: 2, l0_addr0: 2, out_mem_0_addr0: 32, out_mem_0_write_data: 32, out_mem_0_write_en: 1) { +component systolic_array_comp(depth: 32, t0_read_data: 32, l0_read_data: 32, out_mem_0_done: 1) -> (t0_addr0: 2, l0_addr0: 2, out_mem_0_addr0: 32, out_mem_0_write_data: 32, out_mem_0_write_en: 1) { cells { min_depth_4 = std_reg(32); lt_depth_4 = std_lt(32); @@ -48,7 +48,6 @@ component systolic_array_comp(depth: 32, t0_read_data: 32, l0_read_data: 32) -> idx = std_reg(32); idx_add = std_add(32); cond_reg = std_reg(1); - lt_iter_limit = std_lt(32); idx_between_5_depth_plus_5_reg = std_reg(1); index_lt_depth_plus_5 = std_lt(32); index_ge_5 = std_ge(32); @@ -66,6 +65,7 @@ component systolic_array_comp(depth: 32, t0_read_data: 32, l0_read_data: 32) -> index_lt_depth_plus_6 = std_lt(32); index_ge_depth_plus_5 = std_ge(32); idx_between_depth_plus_5_depth_plus_6_comb = std_and(1); + lt_iter_limit = std_lt(32); } wires { static<1> group init_min_depth { @@ -150,12 +150,6 @@ component systolic_array_comp(depth: 32, t0_read_data: 32, l0_read_data: 32) -> cond_reg.in = 1'd1; cond_reg.write_en = 1'd1; } - static<1> group lt_iter_limit_group { - lt_iter_limit.left = idx_add.out; - lt_iter_limit.right = iter_limit.out; - cond_reg.in = lt_iter_limit.out; - cond_reg.write_en = 1'd1; - } static<1> group idx_between_5_depth_plus_5_group { index_ge_5.left = idx_add.out; index_ge_5.right = 32'd5; @@ -222,6 +216,12 @@ component systolic_array_comp(depth: 32, t0_read_data: 32, l0_read_data: 32) -> idx_between_depth_plus_5_depth_plus_6_reg.in = 1'd0; idx_between_depth_plus_5_depth_plus_6_reg.write_en = 1'd1; } + static<1> group lt_iter_limit_group { + lt_iter_limit.left = idx_add.out; + lt_iter_limit.right = iter_limit.out; + cond_reg.in = lt_iter_limit.out; + cond_reg.write_en = 1'd1; + } } control { seq { @@ -275,11 +275,11 @@ component systolic_array_comp(depth: 32, t0_read_data: 32, l0_read_data: 32) -> idx_between_1_depth_plus_1_group; idx_between_1_min_depth_4_plus_1_group; idx_between_depth_plus_5_depth_plus_6_group; - depth_plus_5_group; depth_plus_0_group; depth_plus_1_group; - min_depth_4_plus_1_group; + depth_plus_5_group; depth_plus_6_group; + min_depth_4_plus_1_group; } } } @@ -297,7 +297,7 @@ component main() -> () { } control { - invoke systolic_array(depth=32'd3, t0_read_data=t0.read_data, l0_read_data=l0.read_data)(t0_addr0=t0.addr0, l0_addr0=l0.addr0, out_mem_0_addr0=out_mem_0.addr0, out_mem_0_write_data=out_mem_0.write_data, out_mem_0_write_en=out_mem_0.write_en); + invoke systolic_array(depth=32'd3, t0_read_data=t0.read_data, l0_read_data=l0.read_data, out_mem_0_done=out_mem_0.done)(t0_addr0=t0.addr0, l0_addr0=l0.addr0, out_mem_0_addr0=out_mem_0.addr0, out_mem_0_write_data=out_mem_0.write_data, out_mem_0_write_en=out_mem_0.write_en); } } metadata #{ diff --git a/tests/frontend/systolic/array-2.expect b/tests/frontend/systolic/array-2.expect index e0682f0620..1dcba310cc 100644 --- a/tests/frontend/systolic/array-2.expect +++ b/tests/frontend/systolic/array-2.expect @@ -27,7 +27,7 @@ static<1> component mac_pe(top: 32, left: 32, mul_ready: 1) -> (out: 32) { } } } -component systolic_array_comp(depth: 32, t0_read_data: 32, t1_read_data: 32, l0_read_data: 32, l1_read_data: 32) -> (t0_addr0: 2, t1_addr0: 2, l0_addr0: 2, l1_addr0: 2, out_mem_0_addr0: 32, out_mem_0_write_data: 32, out_mem_0_write_en: 1, out_mem_1_addr0: 32, out_mem_1_write_data: 32, out_mem_1_write_en: 1) { +component systolic_array_comp(depth: 32, t0_read_data: 32, t1_read_data: 32, l0_read_data: 32, l1_read_data: 32, out_mem_0_done: 1, out_mem_1_done: 1) -> (t0_addr0: 2, t1_addr0: 2, l0_addr0: 2, l1_addr0: 2, out_mem_0_addr0: 32, out_mem_0_write_data: 32, out_mem_0_write_en: 1, out_mem_1_addr0: 32, out_mem_1_write_data: 32, out_mem_1_write_en: 1) { cells { min_depth_4 = std_reg(32); lt_depth_4 = std_lt(32); @@ -67,7 +67,6 @@ component systolic_array_comp(depth: 32, t0_read_data: 32, t1_read_data: 32, l0_ idx = std_reg(32); idx_add = std_add(32); cond_reg = std_reg(1); - lt_iter_limit = std_lt(32); idx_between_5_depth_plus_5_reg = std_reg(1); index_lt_depth_plus_5 = std_lt(32); index_ge_5 = std_ge(32); @@ -113,6 +112,7 @@ component systolic_array_comp(depth: 32, t0_read_data: 32, t1_read_data: 32, l0_ index_lt_depth_plus_8 = std_lt(32); index_ge_depth_plus_7 = std_ge(32); idx_between_depth_plus_7_depth_plus_8_comb = std_and(1); + lt_iter_limit = std_lt(32); } wires { static<1> group init_min_depth { @@ -282,12 +282,6 @@ component systolic_array_comp(depth: 32, t0_read_data: 32, t1_read_data: 32, l0_ cond_reg.in = 1'd1; cond_reg.write_en = 1'd1; } - static<1> group lt_iter_limit_group { - lt_iter_limit.left = idx_add.out; - lt_iter_limit.right = iter_limit.out; - cond_reg.in = lt_iter_limit.out; - cond_reg.write_en = 1'd1; - } static<1> group idx_between_5_depth_plus_5_group { index_ge_5.left = idx_add.out; index_ge_5.right = 32'd5; @@ -466,6 +460,12 @@ component systolic_array_comp(depth: 32, t0_read_data: 32, t1_read_data: 32, l0_ idx_between_depth_plus_7_depth_plus_8_reg.in = 1'd0; idx_between_depth_plus_7_depth_plus_8_reg.write_en = 1'd1; } + static<1> group lt_iter_limit_group { + lt_iter_limit.left = idx_add.out; + lt_iter_limit.right = iter_limit.out; + cond_reg.in = lt_iter_limit.out; + cond_reg.write_en = 1'd1; + } } control { seq { @@ -616,17 +616,17 @@ component systolic_array_comp(depth: 32, t0_read_data: 32, t1_read_data: 32, l0_ idx_between_2_min_depth_4_plus_2_group; idx_between_6_depth_plus_6_group; idx_between_depth_plus_7_depth_plus_8_group; - depth_plus_5_group; - depth_plus_2_group; - depth_plus_7_group; depth_plus_0_group; - min_depth_4_plus_1_group; depth_plus_1_group; - depth_plus_6_group; - min_depth_4_plus_3_group; + depth_plus_2_group; depth_plus_3_group; - min_depth_4_plus_2_group; + depth_plus_5_group; + depth_plus_6_group; + depth_plus_7_group; depth_plus_8_group; + min_depth_4_plus_1_group; + min_depth_4_plus_2_group; + min_depth_4_plus_3_group; } } } @@ -647,7 +647,7 @@ component main() -> () { } control { - invoke systolic_array(depth=32'd3, t0_read_data=t0.read_data, t1_read_data=t1.read_data, l0_read_data=l0.read_data, l1_read_data=l1.read_data)(t0_addr0=t0.addr0, t1_addr0=t1.addr0, l0_addr0=l0.addr0, l1_addr0=l1.addr0, out_mem_0_addr0=out_mem_0.addr0, out_mem_0_write_data=out_mem_0.write_data, out_mem_0_write_en=out_mem_0.write_en, out_mem_1_addr0=out_mem_1.addr0, out_mem_1_write_data=out_mem_1.write_data, out_mem_1_write_en=out_mem_1.write_en); + invoke systolic_array(depth=32'd3, t0_read_data=t0.read_data, t1_read_data=t1.read_data, l0_read_data=l0.read_data, l1_read_data=l1.read_data, out_mem_0_done=out_mem_0.done, out_mem_1_done=out_mem_1.done)(t0_addr0=t0.addr0, t1_addr0=t1.addr0, l0_addr0=l0.addr0, l1_addr0=l1.addr0, out_mem_0_addr0=out_mem_0.addr0, out_mem_0_write_data=out_mem_0.write_data, out_mem_0_write_en=out_mem_0.write_en, out_mem_1_addr0=out_mem_1.addr0, out_mem_1_write_data=out_mem_1.write_data, out_mem_1_write_en=out_mem_1.write_en); } } metadata #{ diff --git a/tests/frontend/systolic/array-3.expect b/tests/frontend/systolic/array-3.expect index 580d914d01..ddfd38050d 100644 --- a/tests/frontend/systolic/array-3.expect +++ b/tests/frontend/systolic/array-3.expect @@ -27,7 +27,7 @@ static<1> component mac_pe(top: 32, left: 32, mul_ready: 1) -> (out: 32) { } } } -component systolic_array_comp(depth: 32, t0_read_data: 32, t1_read_data: 32, t2_read_data: 32, l0_read_data: 32, l1_read_data: 32, l2_read_data: 32) -> (t0_addr0: 2, t1_addr0: 2, t2_addr0: 2, l0_addr0: 2, l1_addr0: 2, l2_addr0: 2, out_mem_0_addr0: 32, out_mem_0_write_data: 32, out_mem_0_write_en: 1, out_mem_1_addr0: 32, out_mem_1_write_data: 32, out_mem_1_write_en: 1, out_mem_2_addr0: 32, out_mem_2_write_data: 32, out_mem_2_write_en: 1) { +component systolic_array_comp(depth: 32, t0_read_data: 32, t1_read_data: 32, t2_read_data: 32, l0_read_data: 32, l1_read_data: 32, l2_read_data: 32, out_mem_0_done: 1, out_mem_1_done: 1, out_mem_2_done: 1) -> (t0_addr0: 2, t1_addr0: 2, t2_addr0: 2, l0_addr0: 2, l1_addr0: 2, l2_addr0: 2, out_mem_0_addr0: 32, out_mem_0_write_data: 32, out_mem_0_write_en: 1, out_mem_1_addr0: 32, out_mem_1_write_data: 32, out_mem_1_write_en: 1, out_mem_2_addr0: 32, out_mem_2_write_data: 32, out_mem_2_write_en: 1) { cells { min_depth_4 = std_reg(32); lt_depth_4 = std_lt(32); @@ -91,7 +91,6 @@ component systolic_array_comp(depth: 32, t0_read_data: 32, t1_read_data: 32, t2_ idx = std_reg(32); idx_add = std_add(32); cond_reg = std_reg(1); - lt_iter_limit = std_lt(32); idx_between_depth_plus_8_depth_plus_9_reg = std_reg(1); index_lt_depth_plus_9 = std_lt(32); index_ge_depth_plus_8 = std_ge(32); @@ -161,6 +160,7 @@ component systolic_array_comp(depth: 32, t0_read_data: 32, t1_read_data: 32, t2_ idx_between_depth_plus_7_depth_plus_8_reg = std_reg(1); index_ge_depth_plus_7 = std_ge(32); idx_between_depth_plus_7_depth_plus_8_comb = std_and(1); + lt_iter_limit = std_lt(32); } wires { static<1> group init_min_depth { @@ -437,12 +437,6 @@ component systolic_array_comp(depth: 32, t0_read_data: 32, t1_read_data: 32, t2_ cond_reg.in = 1'd1; cond_reg.write_en = 1'd1; } - static<1> group lt_iter_limit_group { - lt_iter_limit.left = idx_add.out; - lt_iter_limit.right = iter_limit.out; - cond_reg.in = lt_iter_limit.out; - cond_reg.write_en = 1'd1; - } static<1> group idx_between_depth_plus_8_depth_plus_9_group { index_ge_depth_plus_8.left = idx_add.out; index_ge_depth_plus_8.right = depth_plus_8.out; @@ -719,6 +713,12 @@ component systolic_array_comp(depth: 32, t0_read_data: 32, t1_read_data: 32, t2_ idx_between_depth_plus_7_depth_plus_8_reg.in = 1'd0; idx_between_depth_plus_7_depth_plus_8_reg.write_en = 1'd1; } + static<1> group lt_iter_limit_group { + lt_iter_limit.left = idx_add.out; + lt_iter_limit.right = iter_limit.out; + cond_reg.in = lt_iter_limit.out; + cond_reg.write_en = 1'd1; + } } control { seq { @@ -1010,22 +1010,22 @@ component systolic_array_comp(depth: 32, t0_read_data: 32, t1_read_data: 32, t2_ idx_between_1_min_depth_4_plus_1_group; idx_between_6_depth_plus_6_group; idx_between_depth_plus_7_depth_plus_8_group; - depth_plus_8_group; - depth_plus_9_group; - min_depth_4_plus_2_group; + depth_plus_0_group; + depth_plus_10_group; + depth_plus_1_group; depth_plus_2_group; - depth_plus_7_group; depth_plus_3_group; - min_depth_4_plus_3_group; + depth_plus_4_group; depth_plus_5_group; depth_plus_6_group; - depth_plus_10_group; - depth_plus_4_group; + depth_plus_7_group; + depth_plus_8_group; + depth_plus_9_group; + min_depth_4_plus_1_group; + min_depth_4_plus_2_group; + min_depth_4_plus_3_group; min_depth_4_plus_4_group; min_depth_4_plus_5_group; - depth_plus_0_group; - depth_plus_1_group; - min_depth_4_plus_1_group; } } } @@ -1049,7 +1049,7 @@ component main() -> () { } control { - invoke systolic_array(depth=32'd3, t0_read_data=t0.read_data, t1_read_data=t1.read_data, t2_read_data=t2.read_data, l0_read_data=l0.read_data, l1_read_data=l1.read_data, l2_read_data=l2.read_data)(t0_addr0=t0.addr0, t1_addr0=t1.addr0, t2_addr0=t2.addr0, l0_addr0=l0.addr0, l1_addr0=l1.addr0, l2_addr0=l2.addr0, out_mem_0_addr0=out_mem_0.addr0, out_mem_0_write_data=out_mem_0.write_data, out_mem_0_write_en=out_mem_0.write_en, out_mem_1_addr0=out_mem_1.addr0, out_mem_1_write_data=out_mem_1.write_data, out_mem_1_write_en=out_mem_1.write_en, out_mem_2_addr0=out_mem_2.addr0, out_mem_2_write_data=out_mem_2.write_data, out_mem_2_write_en=out_mem_2.write_en); + invoke systolic_array(depth=32'd3, t0_read_data=t0.read_data, t1_read_data=t1.read_data, t2_read_data=t2.read_data, l0_read_data=l0.read_data, l1_read_data=l1.read_data, l2_read_data=l2.read_data, out_mem_0_done=out_mem_0.done, out_mem_1_done=out_mem_1.done, out_mem_2_done=out_mem_2.done)(t0_addr0=t0.addr0, t1_addr0=t1.addr0, t2_addr0=t2.addr0, l0_addr0=l0.addr0, l1_addr0=l1.addr0, l2_addr0=l2.addr0, out_mem_0_addr0=out_mem_0.addr0, out_mem_0_write_data=out_mem_0.write_data, out_mem_0_write_en=out_mem_0.write_en, out_mem_1_addr0=out_mem_1.addr0, out_mem_1_write_data=out_mem_1.write_data, out_mem_1_write_en=out_mem_1.write_en, out_mem_2_addr0=out_mem_2.addr0, out_mem_2_write_data=out_mem_2.write_data, out_mem_2_write_en=out_mem_2.write_en); } } metadata #{ diff --git a/z.futil b/z.futil deleted file mode 100644 index cb46f7f3fd..0000000000 --- a/z.futil +++ /dev/null @@ -1,951 +0,0 @@ -import "primitives/core.futil"; -import "primitives/binary_operators.futil"; -import "primitives/pipelined.futil"; -static<1> component mac_pe(top: 32, left: 32, mul_ready: 1) -> (out: 32) { - cells { - acc = std_reg(32); - adder = std_fp_sadd(32, 16, 16); - mul = pipelined_fp_smult(32, 16, 16); - } - wires { - static<1> group do_add { - adder.left = acc.out; - adder.right = mul.out; - acc.in = adder.out; - acc.write_en = mul_ready; - } - static<1> group do_mul { - mul.left = top; - mul.right = left; - } - out = acc.out; - } - control { - static par { - do_add; - do_mul; - } - } -} -component leaky_relu(value: 32, index: 32, out_mem_done: 1, idx_reg_out: 32) -> (out_mem_addr0: 32, out_mem_write_data: 32, out_mem_write_en: 1, idx_reg_write_en: 1, idx_reg_in: 32) { - cells { - fp_mult = std_fp_smult_pipe(32, 16, 16); - val_lt = std_fp_slt(32, 16, 16); - incr_idx = std_add(32); - write_mem = std_wire(1); - } - wires { - val_lt.left = value; - val_lt.right = 32'd0; - group do_relu { - write_mem.in = fp_mult.done | !val_lt.out ? 1'd1; - fp_mult.left = 32'd655; - fp_mult.right = value; - fp_mult.go = !write_mem.out ? 1'd1; - incr_idx.left = idx_reg_out; - incr_idx.right = 32'd1; - idx_reg_in = write_mem.out ? incr_idx.out; - idx_reg_write_en = write_mem.out ? 1'd1; - out_mem_write_en = write_mem.out ? 1'd1; - out_mem_addr0 = index; - out_mem_write_data = !val_lt.out ? value; - out_mem_write_data = val_lt.out ? fp_mult.out; - do_relu[done] = out_mem_done; - } - } - control { - do_relu; - } -} -component systolic_array_comp(depth: 32, t0_read_data: 32, t1_read_data: 32, t2_read_data: 32, t3_read_data: 32, l0_read_data: 32, l1_read_data: 32, out_mem_0_done: 1, out_mem_1_done: 1) -> (t0_addr0: 2, t1_addr0: 2, t2_addr0: 2, t3_addr0: 2, l0_addr0: 2, l1_addr0: 2, out_mem_0_addr0: 32, out_mem_0_write_data: 32, out_mem_0_write_en: 1, out_mem_1_addr0: 32, out_mem_1_write_data: 32, out_mem_1_write_en: 1) { - cells { - min_depth_4 = std_reg(32); - lt_depth_4 = std_lt(32); - depth_plus_4 = std_add(32); - min_depth_4_plus_4 = std_add(32); - depth_plus_8 = std_add(32); - min_depth_4_plus_5 = std_add(32); - min_depth_4_plus_2 = std_add(32); - depth_plus_5 = std_add(32); - depth_plus_7 = std_add(32); - depth_plus_0 = std_add(32); - depth_plus_9 = std_add(32); - depth_plus_1 = std_add(32); - min_depth_4_plus_1 = std_add(32); - depth_plus_6 = std_add(32); - depth_plus_3 = std_add(32); - min_depth_4_plus_3 = std_add(32); - depth_plus_2 = std_add(32); - pe_0_0 = mac_pe(); - top_0_0 = std_reg(32); - left_0_0 = std_reg(32); - pe_0_1 = mac_pe(); - top_0_1 = std_reg(32); - left_0_1 = std_reg(32); - pe_0_2 = mac_pe(); - top_0_2 = std_reg(32); - left_0_2 = std_reg(32); - pe_0_3 = mac_pe(); - top_0_3 = std_reg(32); - left_0_3 = std_reg(32); - pe_1_0 = mac_pe(); - top_1_0 = std_reg(32); - left_1_0 = std_reg(32); - pe_1_1 = mac_pe(); - top_1_1 = std_reg(32); - left_1_1 = std_reg(32); - pe_1_2 = mac_pe(); - top_1_2 = std_reg(32); - left_1_2 = std_reg(32); - pe_1_3 = mac_pe(); - top_1_3 = std_reg(32); - left_1_3 = std_reg(32); - t0_idx = std_reg(2); - t0_add = std_add(2); - t1_idx = std_reg(2); - t1_add = std_add(2); - t2_idx = std_reg(2); - t2_add = std_add(2); - t3_idx = std_reg(2); - t3_add = std_add(2); - l0_idx = std_reg(2); - l0_add = std_add(2); - l1_idx = std_reg(2); - l1_add = std_add(2); - idx = std_reg(32); - idx_add = std_add(32); - cond_reg = std_reg(1); - idx_between_4_depth_plus_4_reg = std_reg(1); - index_lt_depth_plus_4 = std_lt(32); - index_ge_4 = std_ge(32); - idx_between_4_depth_plus_4_comb = std_and(1); - idx_between_4_min_depth_4_plus_4_reg = std_reg(1); - index_lt_min_depth_4_plus_4 = std_lt(32); - idx_between_4_min_depth_4_plus_4_comb = std_and(1); - idx_between_8_depth_plus_8_reg = std_reg(1); - index_lt_depth_plus_8 = std_lt(32); - index_ge_8 = std_ge(32); - idx_between_8_depth_plus_8_comb = std_and(1); - idx_between_5_min_depth_4_plus_5_reg = std_reg(1); - index_lt_min_depth_4_plus_5 = std_lt(32); - index_ge_5 = std_ge(32); - idx_between_5_min_depth_4_plus_5_comb = std_and(1); - idx_between_2_min_depth_4_plus_2_reg = std_reg(1); - index_lt_min_depth_4_plus_2 = std_lt(32); - index_ge_2 = std_ge(32); - idx_between_2_min_depth_4_plus_2_comb = std_and(1); - idx_between_5_depth_plus_5_reg = std_reg(1); - index_lt_depth_plus_5 = std_lt(32); - idx_between_5_depth_plus_5_comb = std_and(1); - idx_between_7_depth_plus_7_reg = std_reg(1); - index_lt_depth_plus_7 = std_lt(32); - index_ge_7 = std_ge(32); - idx_between_7_depth_plus_7_comb = std_and(1); - idx_between_0_depth_plus_0_reg = std_reg(1); - index_lt_depth_plus_0 = std_lt(32); - idx_between_9_depth_plus_9_reg = std_reg(1); - index_lt_depth_plus_9 = std_lt(32); - index_ge_9 = std_ge(32); - idx_between_9_depth_plus_9_comb = std_and(1); - idx_between_depth_plus_5_None_reg = std_reg(1); - index_ge_depth_plus_5 = std_ge(32); - idx_between_1_depth_plus_1_reg = std_reg(1); - index_lt_depth_plus_1 = std_lt(32); - index_ge_1 = std_ge(32); - idx_between_1_depth_plus_1_comb = std_and(1); - idx_between_1_min_depth_4_plus_1_reg = std_reg(1); - index_lt_min_depth_4_plus_1 = std_lt(32); - idx_between_1_min_depth_4_plus_1_comb = std_and(1); - idx_between_depth_plus_6_None_reg = std_reg(1); - index_ge_depth_plus_6 = std_ge(32); - idx_between_3_depth_plus_3_reg = std_reg(1); - index_lt_depth_plus_3 = std_lt(32); - index_ge_3 = std_ge(32); - idx_between_3_depth_plus_3_comb = std_and(1); - idx_between_3_min_depth_4_plus_3_reg = std_reg(1); - index_lt_min_depth_4_plus_3 = std_lt(32); - idx_between_3_min_depth_4_plus_3_comb = std_and(1); - idx_between_2_depth_plus_2_reg = std_reg(1); - index_lt_depth_plus_2 = std_lt(32); - idx_between_2_depth_plus_2_comb = std_and(1); - idx_between_6_depth_plus_6_reg = std_reg(1); - index_lt_depth_plus_6 = std_lt(32); - index_ge_6 = std_ge(32); - idx_between_6_depth_plus_6_comb = std_and(1); - relu_r0_cur_val = std_wire(32); - relu_r0_cur_idx = std_reg(32); - leaky_relu_r0 = leaky_relu(); - relu_finished_wire_r0 = std_wire(1); - relu_r1_cur_val = std_wire(32); - relu_r1_cur_idx = std_reg(32); - leaky_relu_r1 = leaky_relu(); - relu_finished_wire_r1 = std_wire(1); - cond_wire = std_wire(1); - } - wires { - static<1> group init_min_depth { - lt_depth_4.left = depth; - lt_depth_4.right = 32'd4; - min_depth_4.in = lt_depth_4.out ? depth; - min_depth_4.in = !lt_depth_4.out ? 32'd4; - min_depth_4.write_en = 1'd1; - } - static<1> group depth_plus_4_group { - depth_plus_4.left = depth; - depth_plus_4.right = 32'd4; - } - static<1> group min_depth_4_plus_4_group { - min_depth_4_plus_4.left = min_depth_4.out; - min_depth_4_plus_4.right = 32'd4; - } - static<1> group depth_plus_8_group { - depth_plus_8.left = depth; - depth_plus_8.right = 32'd8; - } - static<1> group min_depth_4_plus_5_group { - min_depth_4_plus_5.left = min_depth_4.out; - min_depth_4_plus_5.right = 32'd5; - } - static<1> group min_depth_4_plus_2_group { - min_depth_4_plus_2.left = min_depth_4.out; - min_depth_4_plus_2.right = 32'd2; - } - static<1> group depth_plus_5_group { - depth_plus_5.left = depth; - depth_plus_5.right = 32'd5; - } - static<1> group depth_plus_7_group { - depth_plus_7.left = depth; - depth_plus_7.right = 32'd7; - } - static<1> group depth_plus_0_group { - depth_plus_0.left = depth; - depth_plus_0.right = 32'd0; - } - static<1> group depth_plus_9_group { - depth_plus_9.left = depth; - depth_plus_9.right = 32'd9; - } - static<1> group depth_plus_1_group { - depth_plus_1.left = depth; - depth_plus_1.right = 32'd1; - } - static<1> group min_depth_4_plus_1_group { - min_depth_4_plus_1.left = min_depth_4.out; - min_depth_4_plus_1.right = 32'd1; - } - static<1> group depth_plus_6_group { - depth_plus_6.left = depth; - depth_plus_6.right = 32'd6; - } - static<1> group depth_plus_3_group { - depth_plus_3.left = depth; - depth_plus_3.right = 32'd3; - } - static<1> group min_depth_4_plus_3_group { - min_depth_4_plus_3.left = min_depth_4.out; - min_depth_4_plus_3.right = 32'd3; - } - static<1> group depth_plus_2_group { - depth_plus_2.left = depth; - depth_plus_2.right = 32'd2; - } - static<1> group t0_idx_init { - t0_idx.in = 2'd0; - t0_idx.write_en = 1'd1; - } - static<1> group t0_idx_update { - t0_add.left = 2'd1; - t0_add.right = t0_idx.out; - t0_idx.in = t0_add.out; - t0_idx.write_en = 1'd1; - } - static<1> group t0_move { - t0_addr0 = t0_idx.out; - top_0_0.in = t0_read_data; - top_0_0.write_en = 1'd1; - } - static<1> group t1_idx_init { - t1_idx.in = 2'd0; - t1_idx.write_en = 1'd1; - } - static<1> group t1_idx_update { - t1_add.left = 2'd1; - t1_add.right = t1_idx.out; - t1_idx.in = t1_add.out; - t1_idx.write_en = 1'd1; - } - static<1> group t1_move { - t1_addr0 = t1_idx.out; - top_0_1.in = t1_read_data; - top_0_1.write_en = 1'd1; - } - static<1> group t2_idx_init { - t2_idx.in = 2'd0; - t2_idx.write_en = 1'd1; - } - static<1> group t2_idx_update { - t2_add.left = 2'd1; - t2_add.right = t2_idx.out; - t2_idx.in = t2_add.out; - t2_idx.write_en = 1'd1; - } - static<1> group t2_move { - t2_addr0 = t2_idx.out; - top_0_2.in = t2_read_data; - top_0_2.write_en = 1'd1; - } - static<1> group t3_idx_init { - t3_idx.in = 2'd0; - t3_idx.write_en = 1'd1; - } - static<1> group t3_idx_update { - t3_add.left = 2'd1; - t3_add.right = t3_idx.out; - t3_idx.in = t3_add.out; - t3_idx.write_en = 1'd1; - } - static<1> group t3_move { - t3_addr0 = t3_idx.out; - top_0_3.in = t3_read_data; - top_0_3.write_en = 1'd1; - } - static<1> group l0_idx_init { - l0_idx.in = 2'd0; - l0_idx.write_en = 1'd1; - } - static<1> group l0_idx_update { - l0_add.left = 2'd1; - l0_add.right = l0_idx.out; - l0_idx.in = l0_add.out; - l0_idx.write_en = 1'd1; - } - static<1> group l0_move { - l0_addr0 = l0_idx.out; - left_0_0.in = l0_read_data; - left_0_0.write_en = 1'd1; - } - static<1> group l1_idx_init { - l1_idx.in = 2'd0; - l1_idx.write_en = 1'd1; - } - static<1> group l1_idx_update { - l1_add.left = 2'd1; - l1_add.right = l1_idx.out; - l1_idx.in = l1_add.out; - l1_idx.write_en = 1'd1; - } - static<1> group l1_move { - l1_addr0 = l1_idx.out; - left_1_0.in = l1_read_data; - left_1_0.write_en = 1'd1; - } - static<1> group pe_0_0_right_move { - left_0_1.in = left_0_0.out; - left_0_1.write_en = 1'd1; - } - static<1> group pe_0_0_down_move { - top_1_0.in = top_0_0.out; - top_1_0.write_en = 1'd1; - } - static<1> group pe_0_1_right_move { - left_0_2.in = left_0_1.out; - left_0_2.write_en = 1'd1; - } - static<1> group pe_0_1_down_move { - top_1_1.in = top_0_1.out; - top_1_1.write_en = 1'd1; - } - static<1> group pe_0_2_right_move { - left_0_3.in = left_0_2.out; - left_0_3.write_en = 1'd1; - } - static<1> group pe_0_2_down_move { - top_1_2.in = top_0_2.out; - top_1_2.write_en = 1'd1; - } - static<1> group pe_0_3_down_move { - top_1_3.in = top_0_3.out; - top_1_3.write_en = 1'd1; - } - static<1> group pe_1_0_right_move { - left_1_1.in = left_1_0.out; - left_1_1.write_en = 1'd1; - } - static<1> group pe_1_1_right_move { - left_1_2.in = left_1_1.out; - left_1_2.write_en = 1'd1; - } - static<1> group pe_1_2_right_move { - left_1_3.in = left_1_2.out; - left_1_3.write_en = 1'd1; - } - static<1> group init_idx { - idx.in = 32'd0; - idx.write_en = 1'd1; - } - static<1> group incr_idx { - idx_add.left = idx.out; - idx_add.right = 32'd1; - idx.in = idx_add.out; - idx.write_en = 1'd1; - } - static<1> group init_cond_reg { - cond_reg.in = 1'd1; - cond_reg.write_en = 1'd1; - } - static<1> group idx_between_4_depth_plus_4_group { - index_ge_4.left = idx_add.out; - index_ge_4.right = 32'd4; - index_lt_depth_plus_4.left = idx_add.out; - index_lt_depth_plus_4.right = depth_plus_4.out; - idx_between_4_depth_plus_4_comb.left = index_ge_4.out; - idx_between_4_depth_plus_4_comb.right = index_lt_depth_plus_4.out; - idx_between_4_depth_plus_4_reg.in = idx_between_4_depth_plus_4_comb.out; - idx_between_4_depth_plus_4_reg.write_en = 1'd1; - } - static<1> group init_idx_between_4_depth_plus_4 { - idx_between_4_depth_plus_4_reg.in = 1'd0; - idx_between_4_depth_plus_4_reg.write_en = 1'd1; - } - static<1> group idx_between_4_min_depth_4_plus_4_group { - index_ge_4.left = idx_add.out; - index_ge_4.right = 32'd4; - index_lt_min_depth_4_plus_4.left = idx_add.out; - index_lt_min_depth_4_plus_4.right = min_depth_4_plus_4.out; - idx_between_4_min_depth_4_plus_4_comb.left = index_ge_4.out; - idx_between_4_min_depth_4_plus_4_comb.right = index_lt_min_depth_4_plus_4.out; - idx_between_4_min_depth_4_plus_4_reg.in = idx_between_4_min_depth_4_plus_4_comb.out; - idx_between_4_min_depth_4_plus_4_reg.write_en = 1'd1; - } - static<1> group init_idx_between_4_min_depth_4_plus_4 { - idx_between_4_min_depth_4_plus_4_reg.in = 1'd0; - idx_between_4_min_depth_4_plus_4_reg.write_en = 1'd1; - } - static<1> group idx_between_8_depth_plus_8_group { - index_ge_8.left = idx_add.out; - index_ge_8.right = 32'd8; - index_lt_depth_plus_8.left = idx_add.out; - index_lt_depth_plus_8.right = depth_plus_8.out; - idx_between_8_depth_plus_8_comb.left = index_ge_8.out; - idx_between_8_depth_plus_8_comb.right = index_lt_depth_plus_8.out; - idx_between_8_depth_plus_8_reg.in = idx_between_8_depth_plus_8_comb.out; - idx_between_8_depth_plus_8_reg.write_en = 1'd1; - } - static<1> group init_idx_between_8_depth_plus_8 { - idx_between_8_depth_plus_8_reg.in = 1'd0; - idx_between_8_depth_plus_8_reg.write_en = 1'd1; - } - static<1> group idx_between_5_min_depth_4_plus_5_group { - index_ge_5.left = idx_add.out; - index_ge_5.right = 32'd5; - index_lt_min_depth_4_plus_5.left = idx_add.out; - index_lt_min_depth_4_plus_5.right = min_depth_4_plus_5.out; - idx_between_5_min_depth_4_plus_5_comb.left = index_ge_5.out; - idx_between_5_min_depth_4_plus_5_comb.right = index_lt_min_depth_4_plus_5.out; - idx_between_5_min_depth_4_plus_5_reg.in = idx_between_5_min_depth_4_plus_5_comb.out; - idx_between_5_min_depth_4_plus_5_reg.write_en = 1'd1; - } - static<1> group init_idx_between_5_min_depth_4_plus_5 { - idx_between_5_min_depth_4_plus_5_reg.in = 1'd0; - idx_between_5_min_depth_4_plus_5_reg.write_en = 1'd1; - } - static<1> group idx_between_2_min_depth_4_plus_2_group { - index_ge_2.left = idx_add.out; - index_ge_2.right = 32'd2; - index_lt_min_depth_4_plus_2.left = idx_add.out; - index_lt_min_depth_4_plus_2.right = min_depth_4_plus_2.out; - idx_between_2_min_depth_4_plus_2_comb.left = index_ge_2.out; - idx_between_2_min_depth_4_plus_2_comb.right = index_lt_min_depth_4_plus_2.out; - idx_between_2_min_depth_4_plus_2_reg.in = idx_between_2_min_depth_4_plus_2_comb.out; - idx_between_2_min_depth_4_plus_2_reg.write_en = 1'd1; - } - static<1> group init_idx_between_2_min_depth_4_plus_2 { - idx_between_2_min_depth_4_plus_2_reg.in = 1'd0; - idx_between_2_min_depth_4_plus_2_reg.write_en = 1'd1; - } - static<1> group idx_between_5_depth_plus_5_group { - index_ge_5.left = idx_add.out; - index_ge_5.right = 32'd5; - index_lt_depth_plus_5.left = idx_add.out; - index_lt_depth_plus_5.right = depth_plus_5.out; - idx_between_5_depth_plus_5_comb.left = index_ge_5.out; - idx_between_5_depth_plus_5_comb.right = index_lt_depth_plus_5.out; - idx_between_5_depth_plus_5_reg.in = idx_between_5_depth_plus_5_comb.out; - idx_between_5_depth_plus_5_reg.write_en = 1'd1; - } - static<1> group init_idx_between_5_depth_plus_5 { - idx_between_5_depth_plus_5_reg.in = 1'd0; - idx_between_5_depth_plus_5_reg.write_en = 1'd1; - } - static<1> group idx_between_7_depth_plus_7_group { - index_ge_7.left = idx_add.out; - index_ge_7.right = 32'd7; - index_lt_depth_plus_7.left = idx_add.out; - index_lt_depth_plus_7.right = depth_plus_7.out; - idx_between_7_depth_plus_7_comb.left = index_ge_7.out; - idx_between_7_depth_plus_7_comb.right = index_lt_depth_plus_7.out; - idx_between_7_depth_plus_7_reg.in = idx_between_7_depth_plus_7_comb.out; - idx_between_7_depth_plus_7_reg.write_en = 1'd1; - } - static<1> group init_idx_between_7_depth_plus_7 { - idx_between_7_depth_plus_7_reg.in = 1'd0; - idx_between_7_depth_plus_7_reg.write_en = 1'd1; - } - static<1> group idx_between_0_depth_plus_0_group { - index_lt_depth_plus_0.left = idx_add.out; - index_lt_depth_plus_0.right = depth_plus_0.out; - idx_between_0_depth_plus_0_reg.in = index_lt_depth_plus_0.out; - idx_between_0_depth_plus_0_reg.write_en = 1'd1; - } - static<1> group init_idx_between_0_depth_plus_0 { - idx_between_0_depth_plus_0_reg.in = 1'd1; - idx_between_0_depth_plus_0_reg.write_en = 1'd1; - } - static<1> group idx_between_9_depth_plus_9_group { - index_ge_9.left = idx_add.out; - index_ge_9.right = 32'd9; - index_lt_depth_plus_9.left = idx_add.out; - index_lt_depth_plus_9.right = depth_plus_9.out; - idx_between_9_depth_plus_9_comb.left = index_ge_9.out; - idx_between_9_depth_plus_9_comb.right = index_lt_depth_plus_9.out; - idx_between_9_depth_plus_9_reg.in = idx_between_9_depth_plus_9_comb.out; - idx_between_9_depth_plus_9_reg.write_en = 1'd1; - } - static<1> group init_idx_between_9_depth_plus_9 { - idx_between_9_depth_plus_9_reg.in = 1'd0; - idx_between_9_depth_plus_9_reg.write_en = 1'd1; - } - static<1> group idx_between_depth_plus_5_None_group { - index_ge_depth_plus_5.left = idx_add.out; - index_ge_depth_plus_5.right = depth_plus_5.out; - idx_between_depth_plus_5_None_reg.in = index_ge_depth_plus_5.out; - idx_between_depth_plus_5_None_reg.write_en = 1'd1; - } - static<1> group init_idx_between_depth_plus_5_None { - idx_between_depth_plus_5_None_reg.in = 1'd0; - idx_between_depth_plus_5_None_reg.write_en = 1'd1; - } - static<1> group idx_between_1_depth_plus_1_group { - index_ge_1.left = idx_add.out; - index_ge_1.right = 32'd1; - index_lt_depth_plus_1.left = idx_add.out; - index_lt_depth_plus_1.right = depth_plus_1.out; - idx_between_1_depth_plus_1_comb.left = index_ge_1.out; - idx_between_1_depth_plus_1_comb.right = index_lt_depth_plus_1.out; - idx_between_1_depth_plus_1_reg.in = idx_between_1_depth_plus_1_comb.out; - idx_between_1_depth_plus_1_reg.write_en = 1'd1; - } - static<1> group init_idx_between_1_depth_plus_1 { - idx_between_1_depth_plus_1_reg.in = 1'd0; - idx_between_1_depth_plus_1_reg.write_en = 1'd1; - } - static<1> group idx_between_1_min_depth_4_plus_1_group { - index_ge_1.left = idx_add.out; - index_ge_1.right = 32'd1; - index_lt_min_depth_4_plus_1.left = idx_add.out; - index_lt_min_depth_4_plus_1.right = min_depth_4_plus_1.out; - idx_between_1_min_depth_4_plus_1_comb.left = index_ge_1.out; - idx_between_1_min_depth_4_plus_1_comb.right = index_lt_min_depth_4_plus_1.out; - idx_between_1_min_depth_4_plus_1_reg.in = idx_between_1_min_depth_4_plus_1_comb.out; - idx_between_1_min_depth_4_plus_1_reg.write_en = 1'd1; - } - static<1> group init_idx_between_1_min_depth_4_plus_1 { - idx_between_1_min_depth_4_plus_1_reg.in = 1'd0; - idx_between_1_min_depth_4_plus_1_reg.write_en = 1'd1; - } - static<1> group idx_between_depth_plus_6_None_group { - index_ge_depth_plus_6.left = idx_add.out; - index_ge_depth_plus_6.right = depth_plus_6.out; - idx_between_depth_plus_6_None_reg.in = index_ge_depth_plus_6.out; - idx_between_depth_plus_6_None_reg.write_en = 1'd1; - } - static<1> group init_idx_between_depth_plus_6_None { - idx_between_depth_plus_6_None_reg.in = 1'd0; - idx_between_depth_plus_6_None_reg.write_en = 1'd1; - } - static<1> group idx_between_3_depth_plus_3_group { - index_ge_3.left = idx_add.out; - index_ge_3.right = 32'd3; - index_lt_depth_plus_3.left = idx_add.out; - index_lt_depth_plus_3.right = depth_plus_3.out; - idx_between_3_depth_plus_3_comb.left = index_ge_3.out; - idx_between_3_depth_plus_3_comb.right = index_lt_depth_plus_3.out; - idx_between_3_depth_plus_3_reg.in = idx_between_3_depth_plus_3_comb.out; - idx_between_3_depth_plus_3_reg.write_en = 1'd1; - } - static<1> group init_idx_between_3_depth_plus_3 { - idx_between_3_depth_plus_3_reg.in = 1'd0; - idx_between_3_depth_plus_3_reg.write_en = 1'd1; - } - static<1> group idx_between_3_min_depth_4_plus_3_group { - index_ge_3.left = idx_add.out; - index_ge_3.right = 32'd3; - index_lt_min_depth_4_plus_3.left = idx_add.out; - index_lt_min_depth_4_plus_3.right = min_depth_4_plus_3.out; - idx_between_3_min_depth_4_plus_3_comb.left = index_ge_3.out; - idx_between_3_min_depth_4_plus_3_comb.right = index_lt_min_depth_4_plus_3.out; - idx_between_3_min_depth_4_plus_3_reg.in = idx_between_3_min_depth_4_plus_3_comb.out; - idx_between_3_min_depth_4_plus_3_reg.write_en = 1'd1; - } - static<1> group init_idx_between_3_min_depth_4_plus_3 { - idx_between_3_min_depth_4_plus_3_reg.in = 1'd0; - idx_between_3_min_depth_4_plus_3_reg.write_en = 1'd1; - } - static<1> group idx_between_2_depth_plus_2_group { - index_ge_2.left = idx_add.out; - index_ge_2.right = 32'd2; - index_lt_depth_plus_2.left = idx_add.out; - index_lt_depth_plus_2.right = depth_plus_2.out; - idx_between_2_depth_plus_2_comb.left = index_ge_2.out; - idx_between_2_depth_plus_2_comb.right = index_lt_depth_plus_2.out; - idx_between_2_depth_plus_2_reg.in = idx_between_2_depth_plus_2_comb.out; - idx_between_2_depth_plus_2_reg.write_en = 1'd1; - } - static<1> group init_idx_between_2_depth_plus_2 { - idx_between_2_depth_plus_2_reg.in = 1'd0; - idx_between_2_depth_plus_2_reg.write_en = 1'd1; - } - static<1> group idx_between_6_depth_plus_6_group { - index_ge_6.left = idx_add.out; - index_ge_6.right = 32'd6; - index_lt_depth_plus_6.left = idx_add.out; - index_lt_depth_plus_6.right = depth_plus_6.out; - idx_between_6_depth_plus_6_comb.left = index_ge_6.out; - idx_between_6_depth_plus_6_comb.right = index_lt_depth_plus_6.out; - idx_between_6_depth_plus_6_reg.in = idx_between_6_depth_plus_6_comb.out; - idx_between_6_depth_plus_6_reg.write_en = 1'd1; - } - static<1> group init_idx_between_6_depth_plus_6 { - idx_between_6_depth_plus_6_reg.in = 1'd0; - idx_between_6_depth_plus_6_reg.write_en = 1'd1; - } - static<1> group relu_r0_helper { - relu_r0_cur_val.in = relu_r0_cur_idx.out == 32'd0 ? pe_0_0.out; - relu_r0_cur_val.in = relu_r0_cur_idx.out == 32'd1 ? pe_0_1.out; - relu_r0_cur_val.in = relu_r0_cur_idx.out == 32'd2 ? pe_0_2.out; - relu_r0_cur_val.in = relu_r0_cur_idx.out == 32'd3 ? pe_0_3.out; - } - static<1> group execute_relu_r0 { - leaky_relu_r0.go = !relu_finished_wire_r0.out ? 1'd1; - leaky_relu_r0.value = relu_r0_cur_val.out; - leaky_relu_r0.index = relu_r0_cur_idx.out; - leaky_relu_r0.out_mem_done = out_mem_0_done; - leaky_relu_r0.idx_reg_out = relu_r0_cur_idx.out; - out_mem_0_addr0 = leaky_relu_r0.out_mem_addr0; - out_mem_0_write_data = leaky_relu_r0.out_mem_write_data; - out_mem_0_write_en = leaky_relu_r0.out_mem_write_en; - relu_r0_cur_idx.write_en = leaky_relu_r0.idx_reg_write_en; - relu_r0_cur_idx.in = leaky_relu_r0.idx_reg_in; - relu_finished_wire_r0.in = relu_r0_cur_idx.out == 32'd4 ? 1'd1; - } - static<1> group relu_r1_helper { - relu_r1_cur_val.in = relu_r1_cur_idx.out == 32'd0 ? pe_1_0.out; - relu_r1_cur_val.in = relu_r1_cur_idx.out == 32'd1 ? pe_1_1.out; - relu_r1_cur_val.in = relu_r1_cur_idx.out == 32'd2 ? pe_1_2.out; - relu_r1_cur_val.in = relu_r1_cur_idx.out == 32'd3 ? pe_1_3.out; - } - static<1> group execute_relu_r1 { - leaky_relu_r1.go = !relu_finished_wire_r1.out ? 1'd1; - leaky_relu_r1.value = relu_r1_cur_val.out; - leaky_relu_r1.index = relu_r1_cur_idx.out; - leaky_relu_r1.out_mem_done = out_mem_1_done; - leaky_relu_r1.idx_reg_out = relu_r1_cur_idx.out; - out_mem_1_addr0 = leaky_relu_r1.out_mem_addr0; - out_mem_1_write_data = leaky_relu_r1.out_mem_write_data; - out_mem_1_write_en = leaky_relu_r1.out_mem_write_en; - relu_r1_cur_idx.write_en = leaky_relu_r1.idx_reg_write_en; - relu_r1_cur_idx.in = leaky_relu_r1.idx_reg_in; - relu_finished_wire_r1.in = relu_r1_cur_idx.out == 32'd4 ? 1'd1; - } - static<1> group write_cond_reg { - cond_wire.in = relu_finished_wire_r0.out & relu_finished_wire_r1.out ? 1'd1; - cond_reg.in = !cond_wire.out ? 1'd1; - cond_reg.in = cond_wire.out ? 1'd0; - cond_reg.write_en = 1'd1; - } - } - control { - seq { - static par { - t0_idx_init; - t1_idx_init; - t2_idx_init; - t3_idx_init; - l0_idx_init; - l1_idx_init; - init_idx; - init_min_depth; - init_cond_reg; - init_idx_between_4_depth_plus_4; - init_idx_between_4_min_depth_4_plus_4; - init_idx_between_8_depth_plus_8; - init_idx_between_5_min_depth_4_plus_5; - init_idx_between_2_min_depth_4_plus_2; - init_idx_between_5_depth_plus_5; - init_idx_between_7_depth_plus_7; - init_idx_between_0_depth_plus_0; - init_idx_between_9_depth_plus_9; - init_idx_between_depth_plus_5_None; - init_idx_between_1_depth_plus_1; - init_idx_between_1_min_depth_4_plus_1; - init_idx_between_depth_plus_6_None; - init_idx_between_3_depth_plus_3; - init_idx_between_3_min_depth_4_plus_3; - init_idx_between_2_depth_plus_2; - init_idx_between_6_depth_plus_6; - } - while cond_reg.out { - static par { - static par { - static par { - static if idx_between_0_depth_plus_0_reg.out { - static par { - l0_move; - l0_idx_update; - t0_move; - t0_idx_update; - } - } - static if idx_between_1_min_depth_4_plus_1_reg.out { - static par { - static invoke pe_0_0(top=top_0_0.out, left=left_0_0.out, mul_ready=1'd0)(); - } - } - static if idx_between_1_depth_plus_1_reg.out { - static par { - pe_0_0_down_move; - pe_0_0_right_move; - } - } - static if idx_between_5_depth_plus_5_reg.out { - static par { - static invoke pe_0_0(top=top_0_0.out, left=left_0_0.out, mul_ready=1'd1)(); - } - } - } - static par { - static if idx_between_1_depth_plus_1_reg.out { - static par { - t1_move; - t1_idx_update; - } - } - static if idx_between_2_min_depth_4_plus_2_reg.out { - static par { - static invoke pe_0_1(top=top_0_1.out, left=left_0_1.out, mul_ready=1'd0)(); - } - } - static if idx_between_2_depth_plus_2_reg.out { - static par { - pe_0_1_down_move; - pe_0_1_right_move; - } - } - static if idx_between_6_depth_plus_6_reg.out { - static par { - static invoke pe_0_1(top=top_0_1.out, left=left_0_1.out, mul_ready=1'd1)(); - } - } - } - static par { - static if idx_between_2_depth_plus_2_reg.out { - static par { - t2_move; - t2_idx_update; - } - } - static if idx_between_3_min_depth_4_plus_3_reg.out { - static par { - static invoke pe_0_2(top=top_0_2.out, left=left_0_2.out, mul_ready=1'd0)(); - } - } - static if idx_between_3_depth_plus_3_reg.out { - static par { - pe_0_2_down_move; - pe_0_2_right_move; - } - } - static if idx_between_7_depth_plus_7_reg.out { - static par { - static invoke pe_0_2(top=top_0_2.out, left=left_0_2.out, mul_ready=1'd1)(); - } - } - } - static par { - static if idx_between_3_depth_plus_3_reg.out { - static par { - t3_move; - t3_idx_update; - } - } - static if idx_between_4_min_depth_4_plus_4_reg.out { - static par { - static invoke pe_0_3(top=top_0_3.out, left=left_0_3.out, mul_ready=1'd0)(); - } - } - static if idx_between_4_depth_plus_4_reg.out { - static par { - pe_0_3_down_move; - } - } - static if idx_between_8_depth_plus_8_reg.out { - static par { - static invoke pe_0_3(top=top_0_3.out, left=left_0_3.out, mul_ready=1'd1)(); - } - } - } - static par { - static if idx_between_1_depth_plus_1_reg.out { - static par { - l1_move; - l1_idx_update; - } - } - static if idx_between_2_min_depth_4_plus_2_reg.out { - static par { - static invoke pe_1_0(top=top_1_0.out, left=left_1_0.out, mul_ready=1'd0)(); - } - } - static if idx_between_2_depth_plus_2_reg.out { - static par { - pe_1_0_right_move; - } - } - static if idx_between_6_depth_plus_6_reg.out { - static par { - static invoke pe_1_0(top=top_1_0.out, left=left_1_0.out, mul_ready=1'd1)(); - } - } - } - static par { - static if idx_between_3_min_depth_4_plus_3_reg.out { - static par { - static invoke pe_1_1(top=top_1_1.out, left=left_1_1.out, mul_ready=1'd0)(); - } - } - static if idx_between_3_depth_plus_3_reg.out { - static par { - pe_1_1_right_move; - } - } - static if idx_between_7_depth_plus_7_reg.out { - static par { - static invoke pe_1_1(top=top_1_1.out, left=left_1_1.out, mul_ready=1'd1)(); - } - } - } - static par { - static if idx_between_4_min_depth_4_plus_4_reg.out { - static par { - static invoke pe_1_2(top=top_1_2.out, left=left_1_2.out, mul_ready=1'd0)(); - } - } - static if idx_between_4_depth_plus_4_reg.out { - static par { - pe_1_2_right_move; - } - } - static if idx_between_8_depth_plus_8_reg.out { - static par { - static invoke pe_1_2(top=top_1_2.out, left=left_1_2.out, mul_ready=1'd1)(); - } - } - } - static par { - static if idx_between_5_min_depth_4_plus_5_reg.out { - static par { - static invoke pe_1_3(top=top_1_3.out, left=left_1_3.out, mul_ready=1'd0)(); - } - } - static if idx_between_9_depth_plus_9_reg.out { - static par { - static invoke pe_1_3(top=top_1_3.out, left=left_1_3.out, mul_ready=1'd1)(); - } - } - } - } - static par { - incr_idx; - idx_between_4_depth_plus_4_group; - idx_between_4_min_depth_4_plus_4_group; - idx_between_8_depth_plus_8_group; - idx_between_5_min_depth_4_plus_5_group; - idx_between_2_min_depth_4_plus_2_group; - idx_between_5_depth_plus_5_group; - idx_between_7_depth_plus_7_group; - idx_between_0_depth_plus_0_group; - idx_between_9_depth_plus_9_group; - idx_between_depth_plus_5_None_group; - idx_between_1_depth_plus_1_group; - idx_between_1_min_depth_4_plus_1_group; - idx_between_depth_plus_6_None_group; - idx_between_3_depth_plus_3_group; - idx_between_3_min_depth_4_plus_3_group; - idx_between_2_depth_plus_2_group; - idx_between_6_depth_plus_6_group; - depth_plus_4_group; - min_depth_4_plus_4_group; - depth_plus_8_group; - min_depth_4_plus_5_group; - min_depth_4_plus_2_group; - depth_plus_5_group; - depth_plus_7_group; - depth_plus_0_group; - depth_plus_9_group; - depth_plus_1_group; - min_depth_4_plus_1_group; - depth_plus_6_group; - depth_plus_3_group; - min_depth_4_plus_3_group; - depth_plus_2_group; - } - static par { - write_cond_reg; - static if idx_between_depth_plus_5_None_reg.out { - static par { - execute_relu_r0; - relu_r0_helper; - } - } - static if idx_between_depth_plus_6_None_reg.out { - static par { - execute_relu_r1; - relu_r1_helper; - } - } - } - } - } - } - } -} -component main() -> () { - cells { - systolic_array = systolic_array_comp(); - @external t0 = std_mem_d1(32, 3, 2); - @external t1 = std_mem_d1(32, 3, 2); - @external t2 = std_mem_d1(32, 3, 2); - @external t3 = std_mem_d1(32, 3, 2); - @external l0 = std_mem_d1(32, 3, 2); - @external l1 = std_mem_d1(32, 3, 2); - @external out_mem_0 = std_mem_d1(32, 4, 32); - @external out_mem_1 = std_mem_d1(32, 4, 32); - } - wires { - - } - control { - invoke systolic_array(depth=32'd3, t0_read_data=t0.read_data, t1_read_data=t1.read_data, t2_read_data=t2.read_data, t3_read_data=t3.read_data, l0_read_data=l0.read_data, l1_read_data=l1.read_data, out_mem_0_done=out_mem_0.done, out_mem_1_done=out_mem_1.done)(t0_addr0=t0.addr0, t1_addr0=t1.addr0, t2_addr0=t2.addr0, t3_addr0=t3.addr0, l0_addr0=l0.addr0, l1_addr0=l1.addr0, out_mem_0_addr0=out_mem_0.addr0, out_mem_0_write_data=out_mem_0.write_data, out_mem_0_write_en=out_mem_0.write_en, out_mem_1_addr0=out_mem_1.addr0, out_mem_1_write_data=out_mem_1.write_data, out_mem_1_write_en=out_mem_1.write_en); - } -} -metadata #{ -0: pe_0_0 filling: [1,min_depth_4_plus_1) accumulating: [5 depth_plus_5) -1: pe_0_1 filling: [2,min_depth_4_plus_2) accumulating: [6 depth_plus_6) -2: pe_0_2 filling: [3,min_depth_4_plus_3) accumulating: [7 depth_plus_7) -3: pe_0_3 filling: [4,min_depth_4_plus_4) accumulating: [8 depth_plus_8) -4: pe_1_0 filling: [2,min_depth_4_plus_2) accumulating: [6 depth_plus_6) -5: pe_1_1 filling: [3,min_depth_4_plus_3) accumulating: [7 depth_plus_7) -6: pe_1_2 filling: [4,min_depth_4_plus_4) accumulating: [8 depth_plus_8) -7: pe_1_3 filling: [5,min_depth_4_plus_5) accumulating: [9 depth_plus_9) -}#