diff --git a/frontends/systolic-lang/check-output.py b/frontends/systolic-lang/check-output.py index 4c69d8f6c0..b88efaa4f3 100644 --- a/frontends/systolic-lang/check-output.py +++ b/frontends/systolic-lang/check-output.py @@ -46,9 +46,12 @@ for c in range(tl): top[r][c] = json_data[f"t{c}"][r] - matmul_result = np.matmul(left, top).flatten() + matmul_result = np.matmul(left, top) - json_result = np.array(json_data["out_mem"]) + res = [] + for r in range(ll): + res.append(json_data[f"out_mem_{r}"]) + json_result = np.array(res) if np.array_equal(json_result, matmul_result): print("Correct") diff --git a/frontends/systolic-lang/gen-systolic.py b/frontends/systolic-lang/gen-systolic.py index 27feed3514..e2dc7323b2 100755 --- a/frontends/systolic-lang/gen-systolic.py +++ b/frontends/systolic-lang/gen-systolic.py @@ -170,18 +170,17 @@ def instantiate_output_move(comp: cb.ComponentBuilder, row, col, cols): Generates groups to move the final value from a PE into the output array. """ group_name = NAME_SCHEME["out mem move"].format(pe=f"pe_{row}_{col}") - idx = row * cols + col pe = comp.get_cell(f"pe_{row}_{col}") - out = comp.get_cell(OUT_MEM) + out = comp.get_cell(OUT_MEM + f"_{row}") with comp.static_group(group_name, 1): - out.addr0 = idx + out.addr0 = col out.write_data = pe.out out.write_en = 1 def gen_schedules(top_length, top_depth, left_length, left_depth): """ - Generates 4 arrays that are the same size as the output (systolic) array + Generates 5 arrays that are the same size as the output (systolic) array Each entry in the array has tuple [start, end) that indicates the cycles that they are active `update_sched` contains when to update the indices of the input memories and feed @@ -191,11 +190,14 @@ def gen_schedules(top_length, top_depth, left_length, left_depth): `pe_accum_sched` contains when to invoke PE and accumulate (bc the multipliers are ready with an output) `pe_move_sched` contains when to "move" the PE (i.e., pass data) + `pe_write_sched` contains when to "write" the PE value into memory (i.e., when + the PE is "finished") """ update_sched = np.zeros((left_length, top_length), dtype=object) pe_fill_sched = np.zeros((left_length, top_length), dtype=object) pe_accum_sched = np.zeros((left_length, top_length), dtype=object) pe_move_sched = np.zeros((left_length, top_length), dtype=object) + pe_write_sched = np.zeros((left_length, top_length), dtype=object) for row in range(0, left_length): for col in range(0, top_length): pos = row + col @@ -203,7 +205,8 @@ def gen_schedules(top_length, top_depth, left_length, left_depth): pe_fill_sched[row][col] = (pos + 1, pos + min(4, left_depth) + 1) pe_accum_sched[row][col] = (pos + 5, pos + left_depth + 5) pe_move_sched[row][col] = (pos + 1, pos + left_depth + 1) - return (update_sched, pe_fill_sched, pe_accum_sched, pe_move_sched) + pe_write_sched[row][col] = (pos + left_depth + 5, pos + left_depth + 6) + return (update_sched, pe_fill_sched, pe_accum_sched, pe_move_sched, pe_write_sched) def accum_nec_ranges(nec_ranges, schedule): @@ -377,6 +380,7 @@ def generate_control( fill_sched, accum_sched, move_sched, + write_sched, nec_ranges, ): """ @@ -449,13 +453,20 @@ def counter(): accum_sched[r][c][1], [get_pe_invoke(r, c, top_length, left_length, 1)], ) + pe_writes = execute_if_between( + comp, + write_sched[r][c][0], + write_sched[r][c][1], + [py_ast.Enable(NAME_SCHEME["out mem move"].format(pe=f"pe_{r}_{c}"))], + ) + pe_control = input_mem_updates + pe_fills + pe_moves + pe_accums + pe_writes + control_stmts.append(py_ast.StaticParComp(pe_control)) + # providing metadata tag = counter() source_map[ tag ] = f"pe_{r}_{c} filling: [{fill_sched[r][c][0]},{fill_sched[r][c][1]}) \ accumulating: [{accum_sched[r][c][0]} {accum_sched[r][c][1]})" - pe_control = input_mem_updates + pe_fills + pe_moves + pe_accums - control_stmts.append(py_ast.StaticParComp(pe_control)) for start, end in nec_ranges: # build the control stmts that assign correct values to # idx_between_{start}_{end}_reg, which is what the if stmts above^ rely on @@ -468,20 +479,11 @@ def counter(): # build the static repeat # num repeats = (top_length - 1) + (left_length - 1) + (top_depth - 1) + 5 + 1 static_repeat = cb.static_repeat( - top_length + left_length + top_depth + 3, repeat_body + top_length + left_length + top_depth + 4, repeat_body ) control.append(static_repeat) - # Move all the results into output memory - mover_groups = [] - for row in range(left_length): - for col in range(top_length): - mover_groups.append( - py_ast.Enable(NAME_SCHEME["out mem move"].format(pe=f"pe_{row}_{col}")) - ) - - control.append(py_ast.StaticSeqComp(mover_groups)) return py_ast.StaticSeqComp(stmts=control), source_map @@ -500,7 +502,7 @@ def create_systolic_array( f"{top_length}x{top_depth} and {left_depth}x{left_length}" ) - (update_sched, fill_sched, accum_sched, move_sched) = gen_schedules( + (update_sched, fill_sched, accum_sched, move_sched, write_sched) = gen_schedules( top_length, top_depth, left_length, left_depth ) nec_ranges = set() @@ -508,6 +510,7 @@ def create_systolic_array( accum_nec_ranges(nec_ranges, fill_sched) accum_nec_ranges(nec_ranges, accum_sched) accum_nec_ranges(nec_ranges, move_sched) + accum_nec_ranges(nec_ranges, write_sched) main = prog.component("main") @@ -524,15 +527,15 @@ def create_systolic_array( instantiate_memory(main, "left", col, left_depth) # Instantiate output memory - total_size = left_length * top_length - out_idx_size = bits_needed(total_size) - main.mem_d1( - OUT_MEM, - BITWIDTH, - total_size, - out_idx_size, - is_external=True, - ) + out_idx_size = bits_needed(top_length) + for i in range(left_length): + main.mem_d1( + OUT_MEM + f"_{i}", + BITWIDTH, + top_length, + out_idx_size, + is_external=True, + ) # Instantiate all the PEs for row in range(left_length): @@ -545,7 +548,7 @@ def create_systolic_array( # Instantiate output movement structure instantiate_output_move(main, row, col, top_length) - iter_limit = top_length + left_length + top_depth + 3 + iter_limit = top_length + left_length + top_depth + 4 iter_idx_size = bits_needed(iter_limit) # instantiate groups that initialize idx to 0 and increment it instantiate_idx_groups(main, iter_idx_size, iter_limit) @@ -566,6 +569,7 @@ def create_systolic_array( fill_sched, accum_sched, move_sched, + write_sched, nec_ranges, ) main.control = control diff --git a/tests/correctness/systolic/output/array-2-3-4.expect b/tests/correctness/systolic/output/array-2-3-4.expect index 173ea2b74d..cfe7d6ab98 100644 --- a/tests/correctness/systolic/output/array-2-3-4.expect +++ b/tests/correctness/systolic/output/array-2-3-4.expect @@ -1,5 +1,5 @@ { - "cycles": 21, + "cycles": 14, "memories": { "l0": [ 62, @@ -11,11 +11,13 @@ 28, 61 ], - "out_mem": [ + "out_mem_0": [ 5304, 5634, 8244, - 1030, + 1030 + ], + "out_mem_1": [ 8518, 8879, 11617, diff --git a/tests/correctness/systolic/output/array-2-3-4.systolic.data b/tests/correctness/systolic/output/array-2-3-4.systolic.data index 0892739181..638b8e3506 100644 --- a/tests/correctness/systolic/output/array-2-3-4.systolic.data +++ b/tests/correctness/systolic/output/array-2-3-4.systolic.data @@ -23,12 +23,21 @@ "width": 32 } }, - "out_mem": { + "out_mem_0": { "data": [ 0, 0, 0, - 0, + 0 + ], + "format": { + "is_signed": false, + "numeric_type": "bitnum", + "width": 32 + } + }, + "out_mem_1": { + "data": [ 0, 0, 0, diff --git a/tests/correctness/systolic/output/array-8.expect b/tests/correctness/systolic/output/array-8.expect index e4e82058d5..4cc6701e66 100644 --- a/tests/correctness/systolic/output/array-8.expect +++ b/tests/correctness/systolic/output/array-8.expect @@ -1,5 +1,5 @@ { - "cycles": 92, + "cycles": 29, "memories": { "l0": [ 26, @@ -81,7 +81,7 @@ 70, 91 ], - "out_mem": [ + "out_mem_0": [ 15082, 17066, 25978, @@ -89,7 +89,9 @@ 17367, 27929, 17607, - 13732, + 13732 + ], + "out_mem_1": [ 9378, 10449, 15741, @@ -97,7 +99,9 @@ 12877, 18998, 9314, - 9333, + 9333 + ], + "out_mem_2": [ 15735, 12897, 24104, @@ -105,7 +109,9 @@ 16455, 29104, 17296, - 15490, + 15490 + ], + "out_mem_3": [ 22450, 26165, 32194, @@ -113,7 +119,9 @@ 23784, 33638, 26276, - 24976, + 24976 + ], + "out_mem_4": [ 15650, 19069, 21323, @@ -121,7 +129,9 @@ 19967, 24453, 17448, - 14934, + 14934 + ], + "out_mem_5": [ 18516, 22029, 30577, @@ -129,7 +139,9 @@ 20837, 35265, 21524, - 14972, + 14972 + ], + "out_mem_6": [ 13426, 16673, 19948, @@ -137,7 +149,9 @@ 15650, 23464, 18419, - 10693, + 10693 + ], + "out_mem_7": [ 15791, 22708, 22926, diff --git a/tests/correctness/systolic/output/array-8.systolic.data b/tests/correctness/systolic/output/array-8.systolic.data index df6de03c03..c490faa8cc 100644 --- a/tests/correctness/systolic/output/array-8.systolic.data +++ b/tests/correctness/systolic/output/array-8.systolic.data @@ -135,7 +135,7 @@ "width": 32 } }, - "out_mem": { + "out_mem_0": { "data": [ 0, 0, @@ -144,6 +144,16 @@ 0, 0, 0, + 0 + ], + "format": { + "is_signed": false, + "numeric_type": "bitnum", + "width": 32 + } + }, + "out_mem_1": { + "data": [ 0, 0, 0, @@ -151,6 +161,16 @@ 0, 0, 0, + 0 + ], + "format": { + "is_signed": false, + "numeric_type": "bitnum", + "width": 32 + } + }, + "out_mem_2": { + "data": [ 0, 0, 0, @@ -158,6 +178,16 @@ 0, 0, 0, + 0 + ], + "format": { + "is_signed": false, + "numeric_type": "bitnum", + "width": 32 + } + }, + "out_mem_3": { + "data": [ 0, 0, 0, @@ -165,6 +195,16 @@ 0, 0, 0, + 0 + ], + "format": { + "is_signed": false, + "numeric_type": "bitnum", + "width": 32 + } + }, + "out_mem_4": { + "data": [ 0, 0, 0, @@ -172,6 +212,16 @@ 0, 0, 0, + 0 + ], + "format": { + "is_signed": false, + "numeric_type": "bitnum", + "width": 32 + } + }, + "out_mem_5": { + "data": [ 0, 0, 0, @@ -179,6 +229,16 @@ 0, 0, 0, + 0 + ], + "format": { + "is_signed": false, + "numeric_type": "bitnum", + "width": 32 + } + }, + "out_mem_6": { + "data": [ 0, 0, 0, @@ -186,13 +246,16 @@ 0, 0, 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, + 0 + ], + "format": { + "is_signed": false, + "numeric_type": "bitnum", + "width": 32 + } + }, + "out_mem_7": { + "data": [ 0, 0, 0, diff --git a/tests/correctness/systolic/pe/array-2.expect b/tests/correctness/systolic/pe/array-2.expect index aa76585094..c947279742 100644 --- a/tests/correctness/systolic/pe/array-2.expect +++ b/tests/correctness/systolic/pe/array-2.expect @@ -1,5 +1,5 @@ { - "cycles": 19, + "cycles": 16, "pe_00": [ 0, 120, diff --git a/tests/correctness/systolic/pe/array-3.expect b/tests/correctness/systolic/pe/array-3.expect index bd0ac30e65..25ea25a921 100644 --- a/tests/correctness/systolic/pe/array-3.expect +++ b/tests/correctness/systolic/pe/array-3.expect @@ -1,5 +1,5 @@ { - "cycles": 26, + "cycles": 18, "pe_00": [ 0, 120, diff --git a/tests/frontend/systolic/array-1.expect b/tests/frontend/systolic/array-1.expect index ec5f098e2e..d1ed422c83 100644 --- a/tests/frontend/systolic/array-1.expect +++ b/tests/frontend/systolic/array-1.expect @@ -38,9 +38,13 @@ component main() -> () { @external l0 = std_mem_d1(32, 3, 2); l0_idx = std_reg(2); l0_add = std_add(2); - @external out_mem = std_mem_d1(32, 1, 1); + @external out_mem_0 = std_mem_d1(32, 1, 1); idx = std_reg(4); idx_add = std_add(4); + idx_between_8_9_reg = std_reg(1); + index_lt_9 = std_lt(4); + index_ge_8 = std_ge(4); + idx_between_8_9_comb = std_and(1); idx_between_0_3_reg = std_reg(1); index_lt_3 = std_lt(4); idx_between_1_4_reg = std_reg(1); @@ -84,9 +88,9 @@ component main() -> () { left_0_0.write_en = 1'd1; } static<1> group pe_0_0_out_write { - out_mem.addr0 = 1'd0; - out_mem.write_data = pe_0_0.out; - out_mem.write_en = 1'd1; + out_mem_0.addr0 = 1'd0; + out_mem_0.write_data = pe_0_0.out; + out_mem_0.write_en = 1'd1; } static<1> group init_idx { idx.in = 4'd0; @@ -98,6 +102,20 @@ component main() -> () { idx.in = idx_add.out; idx.write_en = 1'd1; } + static<1> group idx_between_8_9_group { + index_ge_8.left = idx_add.out; + index_ge_8.right = 4'd8; + index_lt_9.left = idx_add.out; + index_lt_9.right = 4'd9; + idx_between_8_9_comb.left = index_ge_8.out; + idx_between_8_9_comb.right = index_lt_9.out; + idx_between_8_9_reg.in = idx_between_8_9_comb.out; + idx_between_8_9_reg.write_en = 1'd1; + } + static<1> group init_idx_between_8_9 { + idx_between_8_9_reg.in = 1'd0; + idx_between_8_9_reg.write_en = 1'd1; + } static<1> group idx_between_0_3_group { index_lt_3.left = idx_add.out; index_lt_3.right = 4'd3; @@ -143,11 +161,12 @@ component main() -> () { t0_idx_init; l0_idx_init; init_idx; + init_idx_between_8_9; init_idx_between_0_3; init_idx_between_1_4; init_idx_between_5_8; } - static repeat 8 { + static repeat 9 { static par { static par { static par { @@ -169,19 +188,22 @@ component main() -> () { static invoke pe_0_0(top=top_0_0.out, left=left_0_0.out, mul_ready=1'd1)(); } } + static if idx_between_8_9_reg.out { + static par { + pe_0_0_out_write; + } + } } } static par { incr_idx; + idx_between_8_9_group; idx_between_0_3_group; idx_between_1_4_group; idx_between_5_8_group; } } } - static seq { - pe_0_0_out_write; - } } } } diff --git a/tests/frontend/systolic/array-2.expect b/tests/frontend/systolic/array-2.expect index df7340b9cc..60218356a4 100644 --- a/tests/frontend/systolic/array-2.expect +++ b/tests/frontend/systolic/array-2.expect @@ -53,15 +53,23 @@ component main() -> () { @external l1 = std_mem_d1(32, 3, 2); l1_idx = std_reg(2); l1_add = std_add(2); - @external out_mem = std_mem_d1(32, 4, 3); + @external out_mem_0 = std_mem_d1(32, 2, 2); + @external out_mem_1 = std_mem_d1(32, 2, 2); idx = std_reg(4); idx_add = std_add(4); + idx_between_9_10_reg = std_reg(1); + index_lt_10 = std_lt(4); + index_ge_9 = std_ge(4); + idx_between_9_10_comb = std_and(1); + idx_between_10_11_reg = std_reg(1); + index_lt_11 = std_lt(4); + index_ge_10 = std_ge(4); + idx_between_10_11_comb = std_and(1); idx_between_5_8_reg = std_reg(1); index_lt_8 = std_lt(4); index_ge_5 = std_ge(4); idx_between_5_8_comb = std_and(1); idx_between_7_10_reg = std_reg(1); - index_lt_10 = std_lt(4); index_ge_7 = std_ge(4); idx_between_7_10_comb = std_and(1); idx_between_0_3_reg = std_reg(1); @@ -70,6 +78,10 @@ component main() -> () { index_lt_4 = std_lt(4); index_ge_1 = std_ge(4); idx_between_1_4_comb = std_and(1); + idx_between_8_9_reg = std_reg(1); + index_lt_9 = std_lt(4); + index_ge_8 = std_ge(4); + idx_between_8_9_comb = std_and(1); idx_between_3_6_reg = std_reg(1); index_lt_6 = std_lt(4); index_ge_3 = std_ge(4); @@ -79,7 +91,6 @@ component main() -> () { index_ge_2 = std_ge(4); idx_between_2_5_comb = std_and(1); idx_between_6_9_reg = std_reg(1); - index_lt_9 = std_lt(4); index_ge_6 = std_ge(4); idx_between_6_9_comb = std_and(1); } @@ -153,32 +164,32 @@ component main() -> () { top_1_0.write_en = 1'd1; } static<1> group pe_0_0_out_write { - out_mem.addr0 = 3'd0; - out_mem.write_data = pe_0_0.out; - out_mem.write_en = 1'd1; + out_mem_0.addr0 = 2'd0; + out_mem_0.write_data = pe_0_0.out; + out_mem_0.write_en = 1'd1; } static<1> group pe_0_1_down_move { top_1_1.in = top_0_1.out; top_1_1.write_en = 1'd1; } static<1> group pe_0_1_out_write { - out_mem.addr0 = 3'd1; - out_mem.write_data = pe_0_1.out; - out_mem.write_en = 1'd1; + out_mem_0.addr0 = 2'd1; + out_mem_0.write_data = pe_0_1.out; + out_mem_0.write_en = 1'd1; } static<1> group pe_1_0_right_move { left_1_1.in = left_1_0.out; left_1_1.write_en = 1'd1; } static<1> group pe_1_0_out_write { - out_mem.addr0 = 3'd2; - out_mem.write_data = pe_1_0.out; - out_mem.write_en = 1'd1; + out_mem_1.addr0 = 2'd0; + out_mem_1.write_data = pe_1_0.out; + out_mem_1.write_en = 1'd1; } static<1> group pe_1_1_out_write { - out_mem.addr0 = 3'd3; - out_mem.write_data = pe_1_1.out; - out_mem.write_en = 1'd1; + out_mem_1.addr0 = 2'd1; + out_mem_1.write_data = pe_1_1.out; + out_mem_1.write_en = 1'd1; } static<1> group init_idx { idx.in = 4'd0; @@ -190,6 +201,34 @@ component main() -> () { idx.in = idx_add.out; idx.write_en = 1'd1; } + static<1> group idx_between_9_10_group { + index_ge_9.left = idx_add.out; + index_ge_9.right = 4'd9; + index_lt_10.left = idx_add.out; + index_lt_10.right = 4'd10; + idx_between_9_10_comb.left = index_ge_9.out; + idx_between_9_10_comb.right = index_lt_10.out; + idx_between_9_10_reg.in = idx_between_9_10_comb.out; + idx_between_9_10_reg.write_en = 1'd1; + } + static<1> group init_idx_between_9_10 { + idx_between_9_10_reg.in = 1'd0; + idx_between_9_10_reg.write_en = 1'd1; + } + static<1> group idx_between_10_11_group { + index_ge_10.left = idx_add.out; + index_ge_10.right = 4'd10; + index_lt_11.left = idx_add.out; + index_lt_11.right = 4'd11; + idx_between_10_11_comb.left = index_ge_10.out; + idx_between_10_11_comb.right = index_lt_11.out; + idx_between_10_11_reg.in = idx_between_10_11_comb.out; + idx_between_10_11_reg.write_en = 1'd1; + } + static<1> group init_idx_between_10_11 { + idx_between_10_11_reg.in = 1'd0; + idx_between_10_11_reg.write_en = 1'd1; + } static<1> group idx_between_5_8_group { index_ge_5.left = idx_add.out; index_ge_5.right = 4'd5; @@ -242,6 +281,20 @@ component main() -> () { idx_between_1_4_reg.in = 1'd0; idx_between_1_4_reg.write_en = 1'd1; } + static<1> group idx_between_8_9_group { + index_ge_8.left = idx_add.out; + index_ge_8.right = 4'd8; + index_lt_9.left = idx_add.out; + index_lt_9.right = 4'd9; + idx_between_8_9_comb.left = index_ge_8.out; + idx_between_8_9_comb.right = index_lt_9.out; + idx_between_8_9_reg.in = idx_between_8_9_comb.out; + idx_between_8_9_reg.write_en = 1'd1; + } + static<1> group init_idx_between_8_9 { + idx_between_8_9_reg.in = 1'd0; + idx_between_8_9_reg.write_en = 1'd1; + } static<1> group idx_between_3_6_group { index_ge_3.left = idx_add.out; index_ge_3.right = 4'd3; @@ -293,15 +346,18 @@ component main() -> () { l0_idx_init; l1_idx_init; init_idx; + init_idx_between_9_10; + init_idx_between_10_11; init_idx_between_5_8; init_idx_between_7_10; init_idx_between_0_3; init_idx_between_1_4; + init_idx_between_8_9; init_idx_between_3_6; init_idx_between_2_5; init_idx_between_6_9; } - static repeat 10 { + static repeat 11 { static par { static par { static par { @@ -329,6 +385,11 @@ component main() -> () { static invoke pe_0_0(top=top_0_0.out, left=left_0_0.out, mul_ready=1'd1)(); } } + static if idx_between_8_9_reg.out { + static par { + pe_0_0_out_write; + } + } } static par { static if idx_between_1_4_reg.out { @@ -352,6 +413,11 @@ component main() -> () { static invoke pe_0_1(top=top_0_1.out, left=left_0_1.out, mul_ready=1'd1)(); } } + static if idx_between_9_10_reg.out { + static par { + pe_0_1_out_write; + } + } } static par { static if idx_between_1_4_reg.out { @@ -375,6 +441,11 @@ component main() -> () { static invoke pe_1_0(top=top_1_0.out, left=left_1_0.out, mul_ready=1'd1)(); } } + static if idx_between_9_10_reg.out { + static par { + pe_1_0_out_write; + } + } } static par { static if idx_between_3_6_reg.out { @@ -387,26 +458,28 @@ component main() -> () { static invoke pe_1_1(top=top_1_1.out, left=left_1_1.out, mul_ready=1'd1)(); } } + static if idx_between_10_11_reg.out { + static par { + pe_1_1_out_write; + } + } } } static par { incr_idx; + idx_between_9_10_group; + idx_between_10_11_group; idx_between_5_8_group; idx_between_7_10_group; idx_between_0_3_group; idx_between_1_4_group; + idx_between_8_9_group; idx_between_3_6_group; idx_between_2_5_group; idx_between_6_9_group; } } } - static seq { - pe_0_0_out_write; - pe_0_1_out_write; - pe_1_0_out_write; - pe_1_1_out_write; - } } } } diff --git a/tests/frontend/systolic/array-3.expect b/tests/frontend/systolic/array-3.expect index 227db90e1b..8f08efe898 100644 --- a/tests/frontend/systolic/array-3.expect +++ b/tests/frontend/systolic/array-3.expect @@ -74,27 +74,46 @@ component main() -> () { @external l2 = std_mem_d1(32, 3, 2); l2_idx = std_reg(2); l2_add = std_add(2); - @external out_mem = std_mem_d1(32, 9, 4); + @external out_mem_0 = std_mem_d1(32, 3, 2); + @external out_mem_1 = std_mem_d1(32, 3, 2); + @external out_mem_2 = std_mem_d1(32, 3, 2); idx = std_reg(4); idx_add = std_add(4); + idx_between_9_10_reg = std_reg(1); + index_lt_10 = std_lt(4); + index_ge_9 = std_ge(4); + idx_between_9_10_comb = std_and(1); + idx_between_10_11_reg = std_reg(1); + index_lt_11 = std_lt(4); + index_ge_10 = std_ge(4); + idx_between_10_11_comb = std_and(1); idx_between_5_8_reg = std_reg(1); index_lt_8 = std_lt(4); index_ge_5 = std_ge(4); idx_between_5_8_comb = std_and(1); idx_between_7_10_reg = std_reg(1); - index_lt_10 = std_lt(4); index_ge_7 = std_ge(4); idx_between_7_10_comb = std_and(1); idx_between_0_3_reg = std_reg(1); index_lt_3 = std_lt(4); idx_between_9_12_reg = std_reg(1); index_lt_12 = std_lt(4); - index_ge_9 = std_ge(4); idx_between_9_12_comb = std_and(1); + idx_between_12_13_reg = std_reg(1); + index_lt_13 = std_lt(4); + index_ge_12 = std_ge(4); + idx_between_12_13_comb = std_and(1); idx_between_1_4_reg = std_reg(1); index_lt_4 = std_lt(4); index_ge_1 = std_ge(4); idx_between_1_4_comb = std_and(1); + idx_between_11_12_reg = std_reg(1); + index_ge_11 = std_ge(4); + idx_between_11_12_comb = std_and(1); + idx_between_8_9_reg = std_reg(1); + index_lt_9 = std_lt(4); + index_ge_8 = std_ge(4); + idx_between_8_9_comb = std_and(1); idx_between_3_6_reg = std_reg(1); index_lt_6 = std_lt(4); index_ge_3 = std_ge(4); @@ -104,7 +123,6 @@ component main() -> () { index_ge_2 = std_ge(4); idx_between_2_5_comb = std_and(1); idx_between_6_9_reg = std_reg(1); - index_lt_9 = std_lt(4); index_ge_6 = std_ge(4); idx_between_6_9_comb = std_and(1); idx_between_4_7_reg = std_reg(1); @@ -112,8 +130,6 @@ component main() -> () { index_ge_4 = std_ge(4); idx_between_4_7_comb = std_and(1); idx_between_8_11_reg = std_reg(1); - index_lt_11 = std_lt(4); - index_ge_8 = std_ge(4); idx_between_8_11_comb = std_and(1); } wires { @@ -216,9 +232,9 @@ component main() -> () { top_1_0.write_en = 1'd1; } static<1> group pe_0_0_out_write { - out_mem.addr0 = 4'd0; - out_mem.write_data = pe_0_0.out; - out_mem.write_en = 1'd1; + out_mem_0.addr0 = 2'd0; + out_mem_0.write_data = pe_0_0.out; + out_mem_0.write_en = 1'd1; } static<1> group pe_0_1_right_move { left_0_2.in = left_0_1.out; @@ -229,18 +245,18 @@ component main() -> () { top_1_1.write_en = 1'd1; } static<1> group pe_0_1_out_write { - out_mem.addr0 = 4'd1; - out_mem.write_data = pe_0_1.out; - out_mem.write_en = 1'd1; + out_mem_0.addr0 = 2'd1; + out_mem_0.write_data = pe_0_1.out; + out_mem_0.write_en = 1'd1; } static<1> group pe_0_2_down_move { top_1_2.in = top_0_2.out; top_1_2.write_en = 1'd1; } static<1> group pe_0_2_out_write { - out_mem.addr0 = 4'd2; - out_mem.write_data = pe_0_2.out; - out_mem.write_en = 1'd1; + out_mem_0.addr0 = 2'd2; + out_mem_0.write_data = pe_0_2.out; + out_mem_0.write_en = 1'd1; } static<1> group pe_1_0_right_move { left_1_1.in = left_1_0.out; @@ -251,9 +267,9 @@ component main() -> () { top_2_0.write_en = 1'd1; } static<1> group pe_1_0_out_write { - out_mem.addr0 = 4'd3; - out_mem.write_data = pe_1_0.out; - out_mem.write_en = 1'd1; + out_mem_1.addr0 = 2'd0; + out_mem_1.write_data = pe_1_0.out; + out_mem_1.write_en = 1'd1; } static<1> group pe_1_1_right_move { left_1_2.in = left_1_1.out; @@ -264,41 +280,41 @@ component main() -> () { top_2_1.write_en = 1'd1; } static<1> group pe_1_1_out_write { - out_mem.addr0 = 4'd4; - out_mem.write_data = pe_1_1.out; - out_mem.write_en = 1'd1; + out_mem_1.addr0 = 2'd1; + out_mem_1.write_data = pe_1_1.out; + out_mem_1.write_en = 1'd1; } static<1> group pe_1_2_down_move { top_2_2.in = top_1_2.out; top_2_2.write_en = 1'd1; } static<1> group pe_1_2_out_write { - out_mem.addr0 = 4'd5; - out_mem.write_data = pe_1_2.out; - out_mem.write_en = 1'd1; + out_mem_1.addr0 = 2'd2; + out_mem_1.write_data = pe_1_2.out; + out_mem_1.write_en = 1'd1; } static<1> group pe_2_0_right_move { left_2_1.in = left_2_0.out; left_2_1.write_en = 1'd1; } static<1> group pe_2_0_out_write { - out_mem.addr0 = 4'd6; - out_mem.write_data = pe_2_0.out; - out_mem.write_en = 1'd1; + out_mem_2.addr0 = 2'd0; + out_mem_2.write_data = pe_2_0.out; + out_mem_2.write_en = 1'd1; } static<1> group pe_2_1_right_move { left_2_2.in = left_2_1.out; left_2_2.write_en = 1'd1; } static<1> group pe_2_1_out_write { - out_mem.addr0 = 4'd7; - out_mem.write_data = pe_2_1.out; - out_mem.write_en = 1'd1; + out_mem_2.addr0 = 2'd1; + out_mem_2.write_data = pe_2_1.out; + out_mem_2.write_en = 1'd1; } static<1> group pe_2_2_out_write { - out_mem.addr0 = 4'd8; - out_mem.write_data = pe_2_2.out; - out_mem.write_en = 1'd1; + out_mem_2.addr0 = 2'd2; + out_mem_2.write_data = pe_2_2.out; + out_mem_2.write_en = 1'd1; } static<1> group init_idx { idx.in = 4'd0; @@ -310,6 +326,34 @@ component main() -> () { idx.in = idx_add.out; idx.write_en = 1'd1; } + static<1> group idx_between_9_10_group { + index_ge_9.left = idx_add.out; + index_ge_9.right = 4'd9; + index_lt_10.left = idx_add.out; + index_lt_10.right = 4'd10; + idx_between_9_10_comb.left = index_ge_9.out; + idx_between_9_10_comb.right = index_lt_10.out; + idx_between_9_10_reg.in = idx_between_9_10_comb.out; + idx_between_9_10_reg.write_en = 1'd1; + } + static<1> group init_idx_between_9_10 { + idx_between_9_10_reg.in = 1'd0; + idx_between_9_10_reg.write_en = 1'd1; + } + static<1> group idx_between_10_11_group { + index_ge_10.left = idx_add.out; + index_ge_10.right = 4'd10; + index_lt_11.left = idx_add.out; + index_lt_11.right = 4'd11; + idx_between_10_11_comb.left = index_ge_10.out; + idx_between_10_11_comb.right = index_lt_11.out; + idx_between_10_11_reg.in = idx_between_10_11_comb.out; + idx_between_10_11_reg.write_en = 1'd1; + } + static<1> group init_idx_between_10_11 { + idx_between_10_11_reg.in = 1'd0; + idx_between_10_11_reg.write_en = 1'd1; + } static<1> group idx_between_5_8_group { index_ge_5.left = idx_add.out; index_ge_5.right = 4'd5; @@ -362,6 +406,20 @@ component main() -> () { idx_between_9_12_reg.in = 1'd0; idx_between_9_12_reg.write_en = 1'd1; } + static<1> group idx_between_12_13_group { + index_ge_12.left = idx_add.out; + index_ge_12.right = 4'd12; + index_lt_13.left = idx_add.out; + index_lt_13.right = 4'd13; + idx_between_12_13_comb.left = index_ge_12.out; + idx_between_12_13_comb.right = index_lt_13.out; + idx_between_12_13_reg.in = idx_between_12_13_comb.out; + idx_between_12_13_reg.write_en = 1'd1; + } + static<1> group init_idx_between_12_13 { + idx_between_12_13_reg.in = 1'd0; + idx_between_12_13_reg.write_en = 1'd1; + } static<1> group idx_between_1_4_group { index_ge_1.left = idx_add.out; index_ge_1.right = 4'd1; @@ -376,6 +434,34 @@ component main() -> () { idx_between_1_4_reg.in = 1'd0; idx_between_1_4_reg.write_en = 1'd1; } + static<1> group idx_between_11_12_group { + index_ge_11.left = idx_add.out; + index_ge_11.right = 4'd11; + index_lt_12.left = idx_add.out; + index_lt_12.right = 4'd12; + idx_between_11_12_comb.left = index_ge_11.out; + idx_between_11_12_comb.right = index_lt_12.out; + idx_between_11_12_reg.in = idx_between_11_12_comb.out; + idx_between_11_12_reg.write_en = 1'd1; + } + static<1> group init_idx_between_11_12 { + idx_between_11_12_reg.in = 1'd0; + idx_between_11_12_reg.write_en = 1'd1; + } + static<1> group idx_between_8_9_group { + index_ge_8.left = idx_add.out; + index_ge_8.right = 4'd8; + index_lt_9.left = idx_add.out; + index_lt_9.right = 4'd9; + idx_between_8_9_comb.left = index_ge_8.out; + idx_between_8_9_comb.right = index_lt_9.out; + idx_between_8_9_reg.in = idx_between_8_9_comb.out; + idx_between_8_9_reg.write_en = 1'd1; + } + static<1> group init_idx_between_8_9 { + idx_between_8_9_reg.in = 1'd0; + idx_between_8_9_reg.write_en = 1'd1; + } static<1> group idx_between_3_6_group { index_ge_3.left = idx_add.out; index_ge_3.right = 4'd3; @@ -457,18 +543,23 @@ component main() -> () { l1_idx_init; l2_idx_init; init_idx; + init_idx_between_9_10; + init_idx_between_10_11; init_idx_between_5_8; init_idx_between_7_10; init_idx_between_0_3; init_idx_between_9_12; + init_idx_between_12_13; init_idx_between_1_4; + init_idx_between_11_12; + init_idx_between_8_9; init_idx_between_3_6; init_idx_between_2_5; init_idx_between_6_9; init_idx_between_4_7; init_idx_between_8_11; } - static repeat 12 { + static repeat 13 { static par { static par { static par { @@ -496,6 +587,11 @@ component main() -> () { static invoke pe_0_0(top=top_0_0.out, left=left_0_0.out, mul_ready=1'd1)(); } } + static if idx_between_8_9_reg.out { + static par { + pe_0_0_out_write; + } + } } static par { static if idx_between_1_4_reg.out { @@ -520,6 +616,11 @@ component main() -> () { static invoke pe_0_1(top=top_0_1.out, left=left_0_1.out, mul_ready=1'd1)(); } } + static if idx_between_9_10_reg.out { + static par { + pe_0_1_out_write; + } + } } static par { static if idx_between_2_5_reg.out { @@ -543,6 +644,11 @@ component main() -> () { static invoke pe_0_2(top=top_0_2.out, left=left_0_2.out, mul_ready=1'd1)(); } } + static if idx_between_10_11_reg.out { + static par { + pe_0_2_out_write; + } + } } static par { static if idx_between_1_4_reg.out { @@ -567,6 +673,11 @@ component main() -> () { static invoke pe_1_0(top=top_1_0.out, left=left_1_0.out, mul_ready=1'd1)(); } } + static if idx_between_9_10_reg.out { + static par { + pe_1_0_out_write; + } + } } static par { static if idx_between_3_6_reg.out { @@ -585,6 +696,11 @@ component main() -> () { static invoke pe_1_1(top=top_1_1.out, left=left_1_1.out, mul_ready=1'd1)(); } } + static if idx_between_10_11_reg.out { + static par { + pe_1_1_out_write; + } + } } static par { static if idx_between_4_7_reg.out { @@ -602,6 +718,11 @@ component main() -> () { static invoke pe_1_2(top=top_1_2.out, left=left_1_2.out, mul_ready=1'd1)(); } } + static if idx_between_11_12_reg.out { + static par { + pe_1_2_out_write; + } + } } static par { static if idx_between_2_5_reg.out { @@ -625,6 +746,11 @@ component main() -> () { static invoke pe_2_0(top=top_2_0.out, left=left_2_0.out, mul_ready=1'd1)(); } } + static if idx_between_10_11_reg.out { + static par { + pe_2_0_out_write; + } + } } static par { static if idx_between_4_7_reg.out { @@ -642,6 +768,11 @@ component main() -> () { static invoke pe_2_1(top=top_2_1.out, left=left_2_1.out, mul_ready=1'd1)(); } } + static if idx_between_11_12_reg.out { + static par { + pe_2_1_out_write; + } + } } static par { static if idx_between_5_8_reg.out { @@ -654,15 +785,25 @@ component main() -> () { static invoke pe_2_2(top=top_2_2.out, left=left_2_2.out, mul_ready=1'd1)(); } } + static if idx_between_12_13_reg.out { + static par { + pe_2_2_out_write; + } + } } } static par { incr_idx; + idx_between_9_10_group; + idx_between_10_11_group; idx_between_5_8_group; idx_between_7_10_group; idx_between_0_3_group; idx_between_9_12_group; + idx_between_12_13_group; idx_between_1_4_group; + idx_between_11_12_group; + idx_between_8_9_group; idx_between_3_6_group; idx_between_2_5_group; idx_between_6_9_group; @@ -671,17 +812,6 @@ component main() -> () { } } } - static seq { - pe_0_0_out_write; - pe_0_1_out_write; - pe_0_2_out_write; - pe_1_0_out_write; - pe_1_1_out_write; - pe_1_2_out_write; - pe_2_0_out_write; - pe_2_1_out_write; - pe_2_2_out_write; - } } } }