Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add initial cgra compiler #704

Open
wants to merge 2 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compiler_gym/envs/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ py_library(
deps = [
":compiler_env",
"//compiler_gym:config",
"//compiler_gym/envs/cgra",
"//compiler_gym/envs/gcc",
"//compiler_gym/envs/llvm",
"//compiler_gym/envs/loop_tool",
Expand Down
1 change: 1 addition & 0 deletions compiler_gym/envs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ cg_add_all_subdirs()
set(ENVS_DEPS
::compiler_env
compiler_gym::envs::gcc::gcc
compiler_gym::envs::cgra::cgra
compiler_gym::envs::loop_tool::loop_tool
)
if(COMPILER_GYM_ENABLE_LLVM_ENV)
Expand Down
2 changes: 2 additions & 0 deletions compiler_gym/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from compiler_gym import config
from compiler_gym.envs.compiler_env import CompilerEnv
from compiler_gym.envs.gcc import GccEnv
from compiler_gym.envs.cgra import CgraEnv

if config.enable_llvm_env:
from compiler_gym.envs.llvm.llvm_env import LlvmEnv # noqa: F401
Expand All @@ -18,6 +19,7 @@
"COMPILER_GYM_ENVS",
"CompilerEnv",
"GccEnv",
"CgraEnv",
"LoopToolEnv",
]

Expand Down
28 changes: 28 additions & 0 deletions compiler_gym/envs/cgra/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
load("@rules_python//python:defs.bzl", "py_library")

py_library(
name = "cgra",
srcs = [
"__init__.py",
"cgra_rewards.py",
"Operations.py",
"compile_settings.py",
"DFG.py"
],
data = [
"//compiler_gym/envs/cgra/service",
],
visibility = ["//visibility:public"],
deps = [
"//compiler_gym/envs/cgra/datasets",
"//compiler_gym/envs/cgra/architectures",
"//compiler_gym/errors",
"//compiler_gym/service:client_service_compiler_env",
"//compiler_gym/service/runtime", # Implicit dependency of service.
"//compiler_gym/util"
],
)
27 changes: 27 additions & 0 deletions compiler_gym/envs/cgra/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright (c) Facebook, Inc. and its affiliates.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this file needs updating to match the cgra BUILD file

#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

cg_add_all_subdirs()

cg_py_library(
NAME
cgra
SRCS
"__init__.py"
"cgra_rewards.py"
"Operations.py"
"compile_settings.py"
"DFG.py"
DATA
compiler_gym::envs::cgra::service::service
DEPS
compiler_gym::service::client_service_compiler_env
compiler_gym::envs::cgra::datasets::datasets
compiler_gym::engs::cgra::architectures::architectures
compiler_gym::errors::errors
compiler_gym::service::runtime::runtime
compiler_gym::util::util
PUBLIC
)
227 changes: 227 additions & 0 deletions compiler_gym/envs/cgra/DFG.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import json
ChrisCummins marked this conversation as resolved.
Show resolved Hide resolved
from pathlib import Path
import random

from importlib_metadata import entry_points
from compiler_gym.service.proto import (
Benchmark
)
from typing import Optional, List
from compiler_gym.third_party.inst2vec import Inst2vecEncoder
import compiler_gym.third_party.llvm as llvm
from compiler_gym.envs.cgra.Operations import Operation, operation_from_name

class Edge(object):
def __init__(self, type):
self.type = type

class Node(object):
def __init__(self, name, operation):
self.name = name
self.operation = operation

def __str__(self):
return "Node with name " + self.name + " and op " + str(self.operation)

class DFG(object):
def __init__(self, working_directory: Optional[Path] = None, from_json: Optional[Path] = None, from_text: Optional[str] = None):
# Copied from here: https://github.com/facebookresearch/CompilerGym/blob/development/examples/loop_optimizations_service/service_py/loops_opt_service.py
# self.inst2vec = _INST2VEC_ENCODER

if from_json is not None:
self.load_dfg_from_json(from_json)
elif from_text is not None:
self.load_dfg_from_text(from_text)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need a final else branch to raise an error if neither is set?


def __str__(self):
res = "nodes are: " + str(self.nodes) + " and edges are " + str(self.adj)
return res

def load_dfg_from_json(self, path):
with open(path, 'r') as p:
# This isnt' text, but I think the json.loads
# that this calls just works?
self.load_dfg_from_text(p)

def load_dfg_from_text(self, text):
f = json.loads(text)
self.nodes = {}
self.node_names = []
self.edges = []
self.adj = {}
self.entry_points = f['entry_points']

# build the nodes first.
for node in f['nodes']:
self.nodes[node['name']] = (Node(node['name'], operation_from_name(node['operation'])))
self.adj[node['name']] = []
self.node_names.append(node['name'])

for edge in f['edges']:
self.edges.append(Edge(edge['type']))

# Build the adj matrix:
for edge in f['edges']:
fnode = edge['from']
tnode = edge['to']

self.adj[fnode].append(tnode)

# Bit slow this one --- the adjacency matrix is backwards for it :'(
def get_preds(self, node):
preds = []
for n in self.adj:
if node.name in self.adj[n]:
preds.append(self.nodes[n])

return preds

def get_succs(self, node):
succs = []
for n in self.adj[node.name]:
succs.append(self.nodes[n])
return succs

def build_preds_lookup(self):
preds_lookup = {}
for n in self.node_names:
preds_lookup[n] = self.get_preds(self.nodes[n])
return preds_lookup

# TODO(jcw) -- fix this, because for a graph with multiple entry nodes,
# this doesn't actually give the right answer :)
# (should do in most cases)
def bfs(self):
to_explore = self.entry_points[:]
print ("Doing BFS, entry points are ")
print(self.entry_points)
seen = set()

# build a lookup based on the predecessors
# for each node.
preds_lookup = self.build_preds_lookup()

while len(to_explore) > 0:
head = to_explore[0]
to_explore = to_explore[1:]
if head in seen:
continue
seen.add(head)
yield self.nodes[head]

# Add the next batch of nodes that we have
# visited all the preds for if there are more
# nodes to explore.
if len(to_explore) == 0 and len(seen) != len(self.node_names):
for node_name in self.node_names:
if node_name in seen:
continue
else:
# Unseen --- have we seen all th preds?
failed = False
for p in preds_lookup[node_name]:
if p.name not in seen:
failed = True
if not failed:
to_explore.append(node_name)
if len(to_explore) == 0: # We added nothing despite trying
# to.

# TODO(jcw) -- Fix this, as support for cyclical DFGs
# is important to be able to support loops with
# cross-loop dependencies.
print("Cyclical DFG --- Impossible to do a true BFS")
print("DFG is ", str(self))
assert False
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick: don't assert False, raise an exception. Or, if that's not possible, sys.exit(1).


# Generate a test DFG using the operations in
# 'operations'.
def generate_DFG(operations: List[Operation], size, seed=0):
random.seed(seed)
# Start with some 0-input ops:
start_ops = random.randint(1, min(size, 3))

# Jump-start this --- in reality, these can be
# phi nodes coming from previous tiers of the loop,
# or variables coming from outside the loop.
start_options = []
print("Generating DFG with ", start_ops, " starting nodes")
for op in operations:
if op.inputs == 0:
start_options.append(op)

node_number = 0
edge_number = 0

entry_points = []
nodes = {}
node_names = []
nodes_list = []
edges = []
adj = {}

# Keep track of variables that we should probably use somewhere.
unused_outputs = []
for i in range(start_ops):
name = "node" + str(node_number)
node_names.append(name)
n = Node(name, random.choice(start_options))
node_number += 1

nodes[name] = n
nodes_list.append(n)
entry_points.append(name)
unused_outputs.append(n)
adj[name] = []

while len(nodes) < size:
# Generate a new node.
operation = random.choice(operations)
name = "node" + str(node_number)
node_names.append(name)
node_number += 1

# Get inputs for this:
inputs = []
while len(inputs) < operation.inputs:
# Select random nodes: baised towards the unused ones.
if random.randint(0, 10) > 6 and len(unused_outputs) > 0:
inputs.append(unused_outputs[0])
unused_outputs = unused_outputs[1:]
else:
inputs.append(random.choice(nodes_list))
# If the node has no arguments, then we should add it
# as an entry point. --- todo(jcw) --- should we just skip
# this avoid creating graphs with too many constant loads?
if operation.inputs == 0:
entry_points.append(name)

# now create the edges.
for inp in inputs:
edge = Edge('data')
# Not too sure why this doens't have the start/end points.
# Think it's a dead datafield.
edges.append(edge)

adj[inp.name].append(name)

this_node = Node(name, operation)
nodes[name] = this_node
nodes_list.append(this_node)
unused_outputs.append(this_node)
adj[name] = []

res = DFG()
res.adj = adj
res.nodes = nodes
res.entry_points = entry_points
res.edges = edges
res.node_names = node_names
print(res.nodes)

return res
58 changes: 58 additions & 0 deletions compiler_gym/envs/cgra/Operations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

ChrisCummins marked this conversation as resolved.
Show resolved Hide resolved
class Operation(object):
def __init__(self, name, inputs, outputs, latency):
self.name = name
self.inputs = inputs
self.outputs = outputs
self.latency = latency

def __str__(self):
return self.name

Operations = [
# TODO(jcw) --- should we support more operations as heterogeneous?
# IMO most of the other things that are scheduled are
# pretty vacuous, although we could explore supporting those.
# Operation is: name, inputs, outputs, cycles.
Operation("add", 2, 1, 1),
Operation("mul", 2, 1, 1),
Operation("sub", 2, 1, 1),
Operation("div", 2, 1, 1),
Operation("and", 2, 1, 1),
Operation("or", 2, 1, 1),
Operation("xor", 2, 1, 1),
Operation("fmul", 2, 1, 1),
Operation("fsub", 2, 1, 1),
Operation("fadd", 2, 1, 1),
Operation("fdiv", 2, 1, 1),
Operation("rsh", 2, 1, 1),
Operation("lsh", 2, 1, 1),
Operation("load", 1, 1, 1),
Operation("store", 1, 1, 1),
Operation("const", 0, 1, 1),
Operation("noop", 0, 0, 1),
]

def operation_index_of(op):
ind = 0
for e in Operations:
if e.name == op.name:
return ind
else:
print (e.name + " uneq " + str(op))
ind += 1
return -1

def operation_latency(op):
# TODO(jcw) --- model latency --- or at least expost this
# to a configuration.
return op.latency


def operation_from_name(n):
ind = operation_index_of(n)
return Operations[ind]
Loading