Skip to content

Commit

Permalink
Dynamic Implementation of ReLU (#1727)
Browse files Browse the repository at this point in the history
* dynamic relu implementation

* formatting
  • Loading branch information
calebmkim authored Sep 22, 2023
1 parent 590c194 commit f6bd3a0
Show file tree
Hide file tree
Showing 12 changed files with 1,000 additions and 73 deletions.
36 changes: 22 additions & 14 deletions frontends/systolic-lang/gen-systolic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/usr/bin/env python3
import calyx.builder as cb
from systolic_arg_parser import SystolicConfiguration
from calyx import py_ast
from calyx.utils import bits_needed
from gen_array_component import (
create_systolic_array,
BITWIDTH,
Expand All @@ -10,14 +13,22 @@
default_post_op,
relu_post_op,
leaky_relu_post_op,
relu_dynamic_post_op,
OUT_MEM,
DEFAULT_POST_OP,
RELU_POST_OP,
LEAKY_RELU_POST_OP,
RELU_DYNAMIC_POST_OP,
)
from systolic_arg_parser import SystolicConfiguration, SUPPORTED_POST_OPS
from calyx import py_ast
from calyx.utils import bits_needed

# Dict that maps command line arguments (e.g., "leaky-relu") to component names
# and function that creates them.
POST_OP_DICT = {
None: (DEFAULT_POST_OP, default_post_op),
"leaky-relu": (LEAKY_RELU_POST_OP, leaky_relu_post_op),
"relu": (RELU_POST_OP, relu_post_op),
"relu-dynamic": (RELU_DYNAMIC_POST_OP, relu_dynamic_post_op),
}


def create_mem_connections(
Expand Down Expand Up @@ -90,7 +101,7 @@ def build_main(prog, config: SystolicConfiguration, post_op_component_name):
# Connect outout memories to post_op, and systolic_array_output to
# post_op inputs.
for i in range(left_length):
# connect output memory to post op
# Connect output memory to post op. want to write to this memory.
connections += create_mem_connections(
main, post_op, OUT_MEM + f"_{i}", top_length, read_mem=False
)
Expand Down Expand Up @@ -137,19 +148,16 @@ def build_main(prog, config: SystolicConfiguration, post_op_component_name):
# Building the main component
prog = cb.Builder()
create_systolic_array(prog, systolic_config)
if systolic_config.post_op == "leaky-relu":
leaky_relu_post_op(prog, config=systolic_config)
post_op_component_name = LEAKY_RELU_POST_OP
elif systolic_config.post_op == "relu":
relu_post_op(prog, config=systolic_config)
post_op_component_name = RELU_POST_OP
elif systolic_config.post_op is None:
default_post_op(prog, config=systolic_config)
post_op_component_name = DEFAULT_POST_OP
if systolic_config.post_op in POST_OP_DICT.keys():
post_op_component_name, component_building_func = POST_OP_DICT[
systolic_config.post_op
]
component_building_func(prog, config=systolic_config)
else:
raise ValueError(
f"{systolic_config.post_op} not supported as a post op. \
Supported post ops are {SUPPORTED_POST_OPS}"
Supported post ops are (None means you pass no argument for -p) \
{POST_OP_DICT.keys()}"
)

build_main(
Expand Down
182 changes: 126 additions & 56 deletions frontends/systolic-lang/gen_post_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
DEFAULT_POST_OP = "default_post_op"
RELU_POST_OP = "relu_post_op"
LEAKY_RELU_POST_OP = "leaky_relu_post_op"
RELU_DYNAMIC_POST_OP = "relu_dynamic_post_op"
COND_REG = "cond_reg"
WRITE_DONE_COND = "write_done_cond"

Expand Down Expand Up @@ -152,23 +153,31 @@ def relu_post_op(prog: cb.Builder, config: SystolicConfiguration):
imm_write_mem_post_op(prog=prog, config=config, perform_relu=True)


def leaky_relu_comp(prog: cb.Builder, idx_width: int):
def add_dynamic_op_params(comp: cb.ComponentBuilder, idx_width: int):
"""
Adds neccesary parameters for dynamic ops, including:
1) Input value
1) Parameters to write the result of the op to memory.
2) Input index (for the memory to write to)
"""
comp.input("value", BITWIDTH)
comp.input("idx", idx_width)
cb.add_write_mem_params(comp, OUT_MEM, data_width=BITWIDTH, addr_width=idx_width)


def leaky_relu_comp(prog: cb.Builder, idx_width: int) -> cb.ComponentBuilder:
"""
Creates a dynamic, non-pipelined, leaky relu component.
This is the component that actually performs the leaky relu computation on
a given output.
"""
comp = prog.component(name="leaky_relu")
comp.input("value", BITWIDTH)
# Takes a memory and register (i.e., arguments that essentially act as ref cells)
cb.add_write_mem_params(comp, OUT_MEM, data_width=BITWIDTH, addr_width=idx_width)
cb.add_register_params(comp, "idx_reg", idx_width)
comp = prog.component(name="leaky_relu_op")
add_dynamic_op_params(comp, idx_width)

this = comp.this()

fp_mult = comp.fp_sop("fp_mult", "mult_pipe", BITWIDTH, INTWIDTH, FRACWIDTH)
lt = comp.fp_sop("val_lt", "lt", BITWIDTH, INTWIDTH, FRACWIDTH)
incr_idx = comp.add(idx_width, "incr_idx")
write_mem = comp.wire("should_write_mem", 1)

with comp.continuous:
Expand All @@ -188,15 +197,9 @@ def leaky_relu_comp(prog: cb.Builder, idx_width: int):
fp_mult.right = this.value
fp_mult.go = ~(write_mem.out) @ 1

# Increment idx_reg during the cycle that we write to memory.
incr_idx.left = this.idx_reg_out
incr_idx.right = 1
this.idx_reg_in = write_mem.out @ incr_idx.out
this.idx_reg_write_en = write_mem.out @ 1

# Write to memory.
this.out_mem_write_en = write_mem.out @ 1
this.out_mem_addr0 = this.idx_reg_out
this.out_mem_addr0 = this.idx
# Write value if this.value >= 0
# Write mult.out if this.value < 0
this.out_mem_write_data = ~lt.out @ this.value
Expand All @@ -205,32 +208,76 @@ def leaky_relu_comp(prog: cb.Builder, idx_width: int):

comp.control = py_ast.Enable("do_relu")

return comp


def create_leaky_relu_done_condition(comp: cb.ComponentBuilder, num_rows: int):
def relu_dynamic_comp(prog: cb.Builder, idx_width: int):
"""
Creates a dynamic, regular RELU component.
This dynamic implementation is meant to be compared to a static
ReLU implementation in order to show the benefits of static groups and
control.
"""
comp = prog.component(name="relu_dynamic_op")
add_dynamic_op_params(comp, idx_width)

this = comp.this()

lt = comp.fp_sop("val_lt", "lt", BITWIDTH, INTWIDTH, FRACWIDTH)

with comp.continuous:
# gt holds whether this.value > 0
lt.left = this.value
lt.right = 0

with comp.group("do_relu") as g:
# Write to memory.
this.out_mem_write_en = 1
this.out_mem_addr0 = this.idx
# Write value if this.value >= 0
# Write mult.out if this.value < 0
this.out_mem_write_data = ~lt.out @ this.value
this.out_mem_write_data = lt.out @ 0

# It takes one cycle to write to g
g.done = this.out_mem_done

comp.control = py_ast.Enable("do_relu")

return comp


def generate_dynamic_post_op_done(comp: cb.ComponentBuilder, num_rows: int):
"""
The done condition for leaky relu components is triggered once all of the
leaky relu operations have finished.
"""
this = comp.this()
# Check if all relu operations have finished for each row
guard = comp.get_cell("relu_finished_wire_r0").out
guard = comp.get_cell("op_finished_wire_r0").out
for r in range(1, num_rows):
guard = guard & comp.get_cell(f"relu_finished_wire_r{r}").out
all_relu_finished_wire = comp.wire("all_relu_finished_wire", 1)
guard = guard & comp.get_cell(f"op_finished_wire_r{r}").out
all_row_finished_wire = comp.wire("all_row_finished_wire", 1)
with comp.static_group(WRITE_DONE_COND, 1):
all_relu_finished_wire.in_ = guard @ 1
this.computation_done = all_relu_finished_wire.out @ 1
all_row_finished_wire.in_ = guard @ 1
this.computation_done = all_row_finished_wire.out @ 1


def create_leaky_relu_groups(comp: cb.ComponentBuilder, row, num_cols, addr_width):
def create_dynamic_post_op_groups(
comp: cb.ComponentBuilder,
row: int,
num_cols: int,
addr_width: int,
op_component: cb.ComponentBuilder,
):
"""
Creates the groups for the leaky relu post op, i.e., the post-op that
coordinates the execution of the leaky relu component.
"""

def store_output_vals(comp: cb.ComponentBuilder, row, num_cols, addr_width):
"""
Helper function that looks at the systolic array output signsl (e.g.,
Helper function that looks at the systolic array output signals (e.g.,
`r0_valid`, `r0_value`, etc.) and creates signals that tells us when: a)
each row is ready for the leaky relu operations to start and b)
the output systolic array values (we need them in registers bc the systolic
Expand All @@ -249,7 +296,7 @@ def store_output_vals(comp: cb.ComponentBuilder, row, num_cols, addr_width):
value_ready_signal = valid_signal & (
idx_signal == cb.ExprBuilder(py_ast.ConstantPort(addr_width, col))
)
with comp.static_group(f"r{row}_c{col}_value_group", 1):
with comp.continuous:
# Wire to detect and hold when the row is first valid. Once
# it is valid, we can safely start our relu operations.
row_ready_reg.in_ = valid_signal @ 1
Expand All @@ -265,90 +312,113 @@ def store_output_vals(comp: cb.ComponentBuilder, row, num_cols, addr_width):
reg_value.write_en = val_ready.out @ 1

# Helper function adds assignment wire.in = reg.out == col ? pe_{row}_{col}_out.
def build_assignment(group: cb.GroupBuilder, wire, register, output_val):
group.asgn(
def build_assignment(wire, register, output_val):
comp.continuous.asgn(
wire.port("in"),
output_val.out,
register.port("out") == cb.ExprBuilder(py_ast.ConstantPort(BITWIDTH, col)),
)

group = comp.static_group(f"r{row}_helper", 1)

# Current value we are performing relu on.
cur_val = comp.wire(f"r{row}_cur_val", BITWIDTH)
# Current idx within the row (i.e., column) for the value we are performing relu on.
idx_reg = comp.reg(f"r{row}_cur_idx", addr_width)
# Handling logic to hold the systolic array's output values so they're available
# for moer than one cycle.
# for more than one cycle.
store_output_vals(comp, row, num_cols, addr_width)
for col in range(num_cols):
output_val = comp.get_cell(f"r{row}_c{col}_val_wire")
# Assigning to cur_val wire so that we always have the current value of the
# row based on idx_reg.
build_assignment(group, cur_val, idx_reg, output_val)
build_assignment(cur_val, idx_reg, output_val)

# Instantiate an instance of a leaky_relu component
relu_instance = comp.cell(f"leaky_relu_r{row}", py_ast.CompInst("leaky_relu", []))
op_instance = comp.cell(
f"{op_component.component.name}_r{row}",
py_ast.CompInst(op_component.component.name, []),
)
# Wire that tells us we are finished with relu operation for this row.
relu_finished_wire = comp.wire(f"relu_finished_wire_r{row}", 1)
row_finished_wire = comp.wire(f"op_finished_wire_r{row}", 1)
row_ready_wire = comp.get_cell(f"r{row}_ready_wire")
incr_idx = comp.add(bits_needed(num_cols), f"incr_idx_r{row}")

# Need to pass this component's memory ports another layer down to
# the leaky_relu cell.
this_relu_io_ports = cb.build_connections(
cell1=comp.this(),
cell2=relu_instance,
cell2=op_instance,
root1=OUT_MEM + f"_{row}_",
root2=OUT_MEM + "_",
forward_ports=["addr0", "write_data", "write_en"],
reverse_ports=["done"],
)
# Building connections between relu and idx_reg
relu_idx_io_ports = cb.build_connections(
cell1=idx_reg,
cell2=relu_instance,
root1="",
root2="idx_reg_",
forward_ports=["write_en", "in"],
reverse_ports=["out", "done"],
)
idx_limit_reached = idx_reg.out == cb.ExprBuilder(
py_ast.ConstantPort(BITWIDTH, num_cols)
)
with comp.static_group(f"execute_relu_r{row}", 1) as g:
for i, o in this_relu_io_ports:
g.asgn(i, o)
for i, o in relu_idx_io_ports:
g.asgn(i, o)
# Handle incrementing the idx_reg.
relu_instance.go = (
row_ready_wire.out & (~relu_finished_wire.out)
incr_idx.left = idx_reg.out
incr_idx.right = 1
idx_reg.in_ = incr_idx.out
# Increment idx once the op is done executing
idx_reg.write_en = op_instance.done @ 1

op_instance.go = (
row_ready_wire.out & (~row_finished_wire.out) & (~op_instance.done)
) @ cb.ExprBuilder(py_ast.ConstantPort(1, 1))
# input ports for relu_instance
relu_instance.value = cur_val.out
relu_finished_wire.in_ = idx_limit_reached @ 1
op_instance.value = cur_val.out
op_instance.idx = idx_reg.out
row_finished_wire.in_ = idx_limit_reached @ 1


def leaky_relu_post_op(prog: cb.Builder, config: SystolicConfiguration):
def dynamic_post_op(
prog: cb.Builder,
config: SystolicConfiguration,
post_op_component_name: str,
op_component: cb.ComponentBuilder,
):
"""
Adds a dynamic leaky relu post op to `prog`
Adds a dynamic post op that performs handles the coordination so that
`op_component` (which can be dynamic) gets executed dynamically on each
systolic array output.
"""
num_rows, num_cols = config.get_output_dimensions()
idx_width = bits_needed(num_cols)
# Create a leaky relu component.
leaky_relu_comp(prog, idx_width)
comp = prog.component(name=LEAKY_RELU_POST_OP)
comp = prog.component(name=post_op_component_name)
add_post_op_params(comp, num_rows, idx_width)
for r in range(num_rows):
create_leaky_relu_groups(comp, r, num_cols, idx_width)
create_leaky_relu_done_condition(comp, num_rows)
create_dynamic_post_op_groups(comp, r, num_cols, idx_width, op_component)
generate_dynamic_post_op_done(comp, num_rows)

# all_groups go in one big static par.
all_groups = [py_ast.Enable(WRITE_DONE_COND)]
for r in range(num_rows):
all_groups.append(py_ast.Enable(f"r{r}_helper"))
all_groups.append(py_ast.Enable(f"execute_relu_r{r}"))
for c in range(num_cols):
all_groups.append(py_ast.Enable(f"r{r}_c{c}_value_group"))

comp.control = py_ast.StaticParComp(all_groups)


def leaky_relu_post_op(prog: cb.Builder, config: SystolicConfiguration):
_, num_cols = config.get_output_dimensions()
leaky_relu_op_comp = leaky_relu_comp(prog, idx_width=bits_needed(num_cols))
dynamic_post_op(
prog=prog,
config=config,
post_op_component_name=LEAKY_RELU_POST_OP,
op_component=leaky_relu_op_comp,
)


def relu_dynamic_post_op(prog: cb.Builder, config: SystolicConfiguration):
_, num_cols = config.get_output_dimensions()
relu_dynamic_op_comp = relu_dynamic_comp(prog, idx_width=bits_needed(num_cols))
dynamic_post_op(
prog=prog,
config=config,
post_op_component_name=RELU_DYNAMIC_POST_OP,
op_component=relu_dynamic_op_comp,
)
2 changes: 1 addition & 1 deletion frontends/systolic-lang/systolic_arg_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import argparse
import json

SUPPORTED_POST_OPS = ["leaky-relu", "relu"]
SUPPORTED_POST_OPS = ["leaky-relu", "relu", "relu-dynamic"]


class SystolicConfiguration:
Expand Down
1 change: 1 addition & 0 deletions runt.toml
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ paths = [
"tests/correctness/systolic/output/*.systolic",
"tests/correctness/systolic/leaky-relu/*.systolic",
"tests/correctness/systolic/relu/*.systolic",
"tests/correctness/systolic/relu-dynamic/*.systolic",
]
cmd = """
fud e --from systolic --to dat \
Expand Down
2 changes: 1 addition & 1 deletion tests/correctness/systolic/leaky-relu/array-2-3-4.expect
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"cycles": 18,
"cycles": 22,
"memories": {
"l0": [
"-1.7772064208984375",
Expand Down
Loading

0 comments on commit f6bd3a0

Please sign in to comment.