Xilinx · iksnagreb · Apr 12, 2024 · Nov 13, 2023 · Nov 20, 2023 · Nov 13, 2023
diff --git a/.isort.cfg b/.isort.cfg
@@ -9,3 +9,6 @@ sections=FUTURE,STDLIB,TEST,THIRDPARTY,FIRSTPARTY,LOCALFOLDER
 default_section=THIRDPARTY
 multi_line_output=3
 profile=black
+ignore_comments=true
+ignore_whitespace=true
+honor_noqa=true
diff --git a/custom_hls/flatten.hpp b/custom_hls/flatten.hpp
@@ -0,0 +1,47 @@
+#ifndef FLATTEN_HPP
+#define FLATTEN_HPP
+
+// HLS arbitrary precision types
+#include <ap_int.h>
+
+// Flattens an array of N elements of Type into a single bitvector
+template<long unsigned N, class Type>
+    ap_uint<N * Type::width> flatten(const Type buffer[N]) {
+// Inline this small piece of bit merging logic
+#pragma HLS INLINE
+        // Fill a flat word of N times the bit-width of the element type
+        ap_uint<N * Type::width> flat;
+        // Merge all N chunks of the tile into the flat bitvector
+        for(unsigned j = 0; j < N; ++j) {
+// Do the merging of all chunks in parallel
+#pragma HLS UNROLL
+            // Insert the chunk into the right place of the
+            // bitvector
+            flat((j + 1) * Type::width - 1, j * Type::width) = buffer[j];
+        }
+        // Return the buffer flattened into a single bitvector
+        return flat;
+    }
+
+// Flattens an array of N elements of float into a single bitvector
+template<long unsigned N>
+    ap_uint<N * 32> flatten(const float buffer[N]) {
+// Inline this small piece of bit merging logic
+#pragma HLS INLINE
+        // Fill a flat word of N times the bit-width of the element type
+        ap_uint<N * 32> flat;
+        // Merge all N chunks of the tile into the flat bitvector
+        for(unsigned j = 0; j < N; ++j) {
+// Do the merging of all chunks in parallel
+#pragma HLS UNROLL
+            // Insert the chunk into the right place of the
+            // bitvector
+            flat((j + 1) * 32 - 1, j * 32) =
+                // Note: Reinterpret the float as a 32-bit unsigned bit-vector
+                *reinterpret_cast<const ap_uint<32>*>(&buffer[j]);
+        }
+        // Return the buffer flattened into a single bitvector
+        return flat;
+    }
+
+#endif // FLATTEN_HPP
diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py
@@ -27,6 +27,33 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+# The base class of all generic custom operations before specializing to either
+# HLS or RTL backend
+from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp
+
+# Dictionary of HWCustomOp implementations
+custom_op = dict()
+
+
+# Registers a class into the custom_op dictionary
+# Note: This must be defined first, before importing any custom op
+# implementation to avoid "importing partially initialized module" issues.
+def register_custom_op(cls):
+    # The class must actually implement HWCustomOp
+    assert issubclass(cls, HWCustomOp), f"{cls} must subclass {HWCustomOp}"
+    # Insert the class into the custom_op dictionary by its name
+    custom_op[cls.__name__] = cls  # noqa: Some weird type annotation issue?
+    # Pass through the class unmodified
+    return cls
+
+
+# flake8: noqa
+# Disable linting from here, as all import will be flagged E402 and maybe F401
+
+
+# Import the submodule containing specializations of ElementwiseBinaryOperation
+# Note: This will automatically register all decorated classes into this domain
+import finn.custom_op.fpgadataflow.elementwise_binary
 from finn.custom_op.fpgadataflow.addstreams import AddStreams
 from finn.custom_op.fpgadataflow.channelwise_op import ChannelwiseOp
 from finn.custom_op.fpgadataflow.concat import StreamingConcat
@@ -55,8 +82,6 @@
 from finn.custom_op.fpgadataflow.upsampler import UpsampleNearestNeighbour
 from finn.custom_op.fpgadataflow.vectorvectoractivation import VVAU
 
-custom_op = dict()
-
 # make sure new HLSCustomOp subclasses are imported here so that they get
 # registered and plug in correctly into the infrastructure
 custom_op["MVAU"] = MVAU