facebookresearch · j-c-w · May 11, 2022 · Jun 19, 2022 · ChrisCummins · Jun 10, 2022
diff --git a/compiler_gym/envs/BUILD b/compiler_gym/envs/BUILD
@@ -11,6 +11,7 @@ py_library(
     deps = [
         ":compiler_env",
         "//compiler_gym:config",
+        "//compiler_gym/envs/cgra",
         "//compiler_gym/envs/gcc",
         "//compiler_gym/envs/llvm",
         "//compiler_gym/envs/loop_tool",

diff --git a/compiler_gym/envs/CMakeLists.txt b/compiler_gym/envs/CMakeLists.txt
@@ -8,6 +8,7 @@ cg_add_all_subdirs()
 set(ENVS_DEPS
     ::compiler_env
     compiler_gym::envs::gcc::gcc
+    compiler_gym::envs::cgra::cgra
     compiler_gym::envs::loop_tool::loop_tool
 )
 if(COMPILER_GYM_ENABLE_LLVM_ENV)

diff --git a/compiler_gym/envs/__init__.py b/compiler_gym/envs/__init__.py
@@ -5,6 +5,7 @@
 from compiler_gym import config
 from compiler_gym.envs.compiler_env import CompilerEnv
 from compiler_gym.envs.gcc import GccEnv
+from compiler_gym.envs.cgra import CgraEnv
 
 if config.enable_llvm_env:
     from compiler_gym.envs.llvm.llvm_env import LlvmEnv  # noqa: F401
@@ -18,6 +19,7 @@
     "COMPILER_GYM_ENVS",
     "CompilerEnv",
     "GccEnv",
+    "CgraEnv",
     "LoopToolEnv",
 ]
 

diff --git a/compiler_gym/envs/cgra/BUILD b/compiler_gym/envs/cgra/BUILD
@@ -0,0 +1,28 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+load("@rules_python//python:defs.bzl", "py_library")
+
+py_library(
+    name = "cgra",
+    srcs = [
+        "__init__.py",
+        "cgra_rewards.py",
+        "Operations.py",
+        "compile_settings.py",
+        "DFG.py"
+    ],
+    data = [
+        "//compiler_gym/envs/cgra/service",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//compiler_gym/envs/cgra/datasets",
+        "//compiler_gym/envs/cgra/architectures",
+        "//compiler_gym/errors",
+        "//compiler_gym/service:client_service_compiler_env",
+        "//compiler_gym/service/runtime",  # Implicit dependency of service.
+        "//compiler_gym/util"
+    ],
+)
diff --git a/compiler_gym/envs/cgra/CMakeLists.txt b/compiler_gym/envs/cgra/CMakeLists.txt
@@ -0,0 +1,27 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+cg_add_all_subdirs()
+
+cg_py_library(
+  NAME
+    cgra
+  SRCS
+    "__init__.py"
+    "cgra_rewards.py"
+    "Operations.py"
+    "compile_settings.py"
+    "DFG.py"
+  DATA
+    compiler_gym::envs::cgra::service::service
+  DEPS
+    compiler_gym::service::client_service_compiler_env
+    compiler_gym::envs::cgra::datasets::datasets
+    compiler_gym::engs::cgra::architectures::architectures
+    compiler_gym::errors::errors
+    compiler_gym::service::runtime::runtime
+    compiler_gym::util::util
+  PUBLIC
+)
diff --git a/compiler_gym/envs/cgra/DFG.py b/compiler_gym/envs/cgra/DFG.py
@@ -0,0 +1,227 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import json
+from pathlib import Path
+import random
+
+from importlib_metadata import entry_points
+from compiler_gym.service.proto import (
+Benchmark
+)
+from typing import Optional, List
+from compiler_gym.third_party.inst2vec import Inst2vecEncoder
+import compiler_gym.third_party.llvm as llvm
+from compiler_gym.envs.cgra.Operations import Operation, operation_from_name
+
+class Edge(object):
+    def __init__(self, type):
+        self.type = type
+
+class Node(object):
+    def __init__(self, name, operation):
+        self.name = name
+        self.operation = operation
+
+    def __str__(self):
+        return "Node with name " + self.name + " and op " + str(self.operation)
+
+class DFG(object):
+    def __init__(self, working_directory: Optional[Path] = None, from_json: Optional[Path] = None, from_text: Optional[str] = None):
+        # Copied from here: https://github.com/facebookresearch/CompilerGym/blob/development/examples/loop_optimizations_service/service_py/loops_opt_service.py
+        # self.inst2vec = _INST2VEC_ENCODER
+
+        if from_json is not None:
+            self.load_dfg_from_json(from_json)
+        elif from_text is not None:
+            self.load_dfg_from_text(from_text)
+
+    def __str__(self):
+        res = "nodes are: " + str(self.nodes) + " and edges are " + str(self.adj)
+        return res
+
+    def load_dfg_from_json(self, path):
+        with open(path, 'r') as p:
+            # This isnt' text, but I think the json.loads
+            # that this calls just works?
+            self.load_dfg_from_text(p)
+
+    def load_dfg_from_text(self, text):
+        f = json.loads(text)
+        self.nodes = {}
+        self.node_names = []
+        self.edges = []
+        self.adj = {}
+        self.entry_points = f['entry_points']
+
+        # build the nodes first.
+        for node in f['nodes']:
+            self.nodes[node['name']] = (Node(node['name'], operation_from_name(node['operation'])))
+            self.adj[node['name']] = []
+            self.node_names.append(node['name'])
+
+        for edge in f['edges']:
+            self.edges.append(Edge(edge['type']))
+
+        # Build the adj matrix:
+        for edge in f['edges']:
+            fnode = edge['from']
+            tnode = edge['to']
+
+            self.adj[fnode].append(tnode)
+
+    # Bit slow this one --- the adjacency matrix is backwards for it :'(
+    def get_preds(self, node):
+        preds = []
+        for n in self.adj:
+            if node.name in self.adj[n]:
+                preds.append(self.nodes[n])
+
+        return preds
+
+    def get_succs(self, node):
+        succs = []
+        for n in self.adj[node.name]:
+            succs.append(self.nodes[n])
+        return succs
+
+    def build_preds_lookup(self):
+        preds_lookup = {}
+        for n in self.node_names:
+            preds_lookup[n] = self.get_preds(self.nodes[n])
+        return preds_lookup
+
+    # TODO(jcw) -- fix this, because for a graph with multiple entry nodes,
+    # this doesn't actually give the right answer :)
+    # (should do in most cases)
+    def bfs(self):
+        to_explore = self.entry_points[:]
+        print ("Doing BFS, entry points are ")
+        print(self.entry_points)
+        seen = set()
+
+        # build a lookup based on the predecessors
+        # for each node.
+        preds_lookup = self.build_preds_lookup()
+
+        while len(to_explore) > 0:
+            head = to_explore[0]
+            to_explore = to_explore[1:]
+            if head in seen:
+                continue
+            seen.add(head)
+            yield self.nodes[head]
+
+            # Add the next batch of nodes that we have
+            # visited all the preds for if there are more
+            # nodes to explore.
+            if len(to_explore) == 0 and len(seen) != len(self.node_names):
+                for node_name in self.node_names:
+                    if node_name in seen:
+                        continue
+                    else:
+                        # Unseen --- have we seen all th preds?
+                        failed = False
+                        for p in preds_lookup[node_name]:
+                            if p.name not in seen:
+                                failed = True
+                        if not failed:
+                            to_explore.append(node_name)
+                if len(to_explore) == 0: # We added nothing despite trying
+                    # to.
+
+                    # TODO(jcw) -- Fix this, as support for cyclical DFGs
+                    # is important to be able to support loops with 
+                    # cross-loop dependencies.
+                    print("Cyclical DFG --- Impossible to do a true BFS")
+                    print("DFG is ", str(self))
+                    assert False
+
+# Generate a test DFG using the operations in
+# 'operations'.
+def generate_DFG(operations: List[Operation], size, seed=0):
+    random.seed(seed)
+    # Start with some 0-input ops:
+    start_ops = random.randint(1, min(size, 3))
+
+    # Jump-start this --- in reality, these can be
+    # phi nodes coming from previous tiers of the loop,
+    # or variables coming from outside the loop.
+    start_options = []
+    print("Generating DFG with ", start_ops, " starting nodes")
+    for op in operations:
+        if op.inputs == 0:
+            start_options.append(op)
+
+    node_number = 0
+    edge_number = 0
+
+    entry_points = []
+    nodes = {}
+    node_names = []
+    nodes_list = []
+    edges = []
+    adj = {}
+
+    # Keep track of variables that we should probably use somewhere.
+    unused_outputs = []
+    for i in range(start_ops):
+        name = "node" + str(node_number)
+        node_names.append(name)
+        n = Node(name, random.choice(start_options))
+        node_number += 1
+
+        nodes[name] = n
+        nodes_list.append(n)
+        entry_points.append(name)
+        unused_outputs.append(n)
+        adj[name] = []
+
+    while len(nodes) < size:
+        # Generate a new node.
+        operation = random.choice(operations)
+        name = "node" + str(node_number)
+        node_names.append(name)
+        node_number += 1
+
+        # Get inputs for this:
+        inputs = []
+        while len(inputs) < operation.inputs:
+            # Select random nodes: baised towards the unused ones.
+            if random.randint(0, 10) > 6 and len(unused_outputs) > 0:
+                inputs.append(unused_outputs[0])
+                unused_outputs = unused_outputs[1:]
+            else:
+                inputs.append(random.choice(nodes_list))
+        # If the node has no arguments, then we should add it
+        # as an entry point.  --- todo(jcw) --- should we just skip
+        # this avoid creating graphs with too many constant loads?
+        if operation.inputs == 0:
+            entry_points.append(name)
+
+        # now create the edges.
+        for inp in inputs:
+            edge = Edge('data')
+            # Not too sure why this doens't have the start/end points.
+            # Think it's a dead datafield.
+            edges.append(edge)
+
+            adj[inp.name].append(name)
+
+        this_node = Node(name, operation)
+        nodes[name] = this_node
+        nodes_list.append(this_node)
+        unused_outputs.append(this_node)
+        adj[name] = []
+
+    res = DFG()
+    res.adj = adj
+    res.nodes = nodes
+    res.entry_points = entry_points
+    res.edges = edges
+    res.node_names = node_names
+    print(res.nodes)
+
+    return res
diff --git a/compiler_gym/envs/cgra/Operations.py b/compiler_gym/envs/cgra/Operations.py
@@ -0,0 +1,58 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+class Operation(object):
+    def __init__(self, name, inputs, outputs, latency):
+        self.name = name
+        self.inputs = inputs
+        self.outputs = outputs
+        self.latency = latency
+
+    def __str__(self):
+        return self.name
+
+Operations = [
+    # TODO(jcw) --- should we support more operations as heterogeneous?
+    # IMO most of the other things that are scheduled are
+    # pretty vacuous, although we could explore supporting those.
+    # Operation is: name, inputs, outputs, cycles.
+    Operation("add", 2, 1, 1),
+    Operation("mul", 2, 1, 1),
+    Operation("sub", 2, 1, 1),
+    Operation("div", 2, 1, 1),
+    Operation("and", 2, 1, 1),
+    Operation("or", 2, 1, 1),
+    Operation("xor", 2, 1, 1),
+    Operation("fmul", 2, 1, 1),
+    Operation("fsub", 2, 1, 1),
+    Operation("fadd", 2, 1, 1),
+    Operation("fdiv", 2, 1, 1),
+    Operation("rsh", 2, 1, 1),
+    Operation("lsh", 2, 1, 1),
+    Operation("load", 1, 1, 1),
+    Operation("store", 1, 1, 1),
+    Operation("const", 0, 1, 1),
+    Operation("noop", 0, 0, 1),
+]
+
+def operation_index_of(op):
+    ind = 0
+    for e in Operations:
+        if e.name == op.name:
+            return ind
+        else:
+            print (e.name + " uneq " + str(op))
+        ind += 1
+    return -1
+
+def operation_latency(op):
+    # TODO(jcw) --- model latency --- or at least expost this
+    # to a configuration.
+    return op.latency
+
+
+def operation_from_name(n):
+    ind = operation_index_of(n)
+    return Operations[ind]