pulp-platform · Dequino · Sep 11, 2024 · Sep 11, 2024 · Sep 11, 2024 · Sep 12, 2024
diff --git a/Deeploy/AbstractDataTypes.py b/Deeploy/AbstractDataTypes.py
@@ -26,6 +26,7 @@
 from __future__ import annotations
 
 import copy
+import math
 from abc import abstractmethod
 from dataclasses import dataclass
 from typing import Dict, Generic, Iterable, List, Optional, Type, TypeVar, Union
@@ -234,6 +235,139 @@ def checkValue(cls, value: Union[int, Iterable[int]], ctxt: Optional[_NetworkCon
         return True
 
 
+class FloatImmediate(Immediate[Union[float, Iterable[float]], _ImmediateType]):
+    typeFraction: int  #: int: Represents the number of bits reserved for the fraction part
+    typeExponent: int  #: int: Represents the number of bits reserved for the exponent part
+    signed: bool  #: bool: Represents whether the underlying float is signed or unsigned (should be removed)
+
+    @_classproperty
+    def typeExponentMax(cls) -> int:
+        # In floating point, all 1 in exponent is reserved for special numbers (i.e. NaN or Inf)
+        return 2**(cls.typeExponent) - 2
+
+    @_classproperty
+    def typeExponentOffset(cls) -> int:
+        # The offset added to the exponent
+        return 2**(cls.typeExponent - 1) - 1
+
+    # ADEQUINO: This is a ugly workaround for FP, works for bfloat16 and fp32 because bfloat16 is a truncated fp32
+    @classmethod
+    def partialOrderUpcast(cls, otherCls: Type[Immediate]) -> bool:
+        if issubclass(otherCls, FloatImmediate):
+            return cls.typeFraction >= otherCls.typeFraction and cls.typeExponent >= otherCls.typeExponent
+        else:
+            return False
+
+    @classmethod
+    def checkValue(cls, value: Union[float, Iterable[float]], ctxt: Optional[_NetworkContext] = None):
+        """
+        This method tries to manually cast standard python's standard immediate float precision values 
+        (64 bits) to an arbitrary FP representation and check if the new representation is close enough 
+        to the original value.
+        """
+        _val_list = []
+
+        if isinstance(value, float):
+            _val_list.append(value)
+        elif isinstance(value, np.ndarray):
+            _val_list = value.tolist()
+        elif isinstance(value, Iterable):
+            for i in value:
+                _val_list.append(i)
+        else:
+            raise Exception("Immediate type not recognized.")
+
+        for val in _val_list:
+            # Zero (and subnormals, not implemented) are special cases
+            if (val == 0):
+                continue
+            # Make the value positive
+            if (val < 0):
+                val = val * -1
+
+            # Separate Integer and Fraction of immediate
+            fraction, integer = math.modf(val)
+
+            # Binarylist for the mantissa
+            binarylist = []
+            f = fraction
+
+            # Fraction binarization, fails if nbits required > n bits mantissa.
+            # If integer part of immediate is 0, we start counting mantissa bits after we find the first 1 bit.
+            if (int(integer) > 0):
+                for i in range(cls.typeFraction):
+                    f = f * 2
+                    f, fint = math.modf(f)
+                    binarylist.append(str(int(fint)))
+                    if f == 0:
+                        break
+                    elif i == (cls.typeFraction - 1):
+                        return False
+            else:
+                flag = 0
+                count = cls.typeFraction + 1
+                while (count):
+                    f = f * 2
+                    f, fint = math.modf(f)
+                    binarylist.append(str(int(fint)))
+                    if int(fint) == 1 and flag == 0:
+                        flag = 1
+                    if f == 0:
+                        break
+                    if flag == 1:
+                        count = count - 1
+                    if (count == 0):
+                        return False
+
+            # Float exponent part
+            # It's equal to the length of the integer part minus 1, if the integer part is not zero.
+            # Otherwise, it's minus the number of 0 bits before the first 1 bit in the fraction representation + 1
+            exponent = 0
+            if (int(bin(int(integer))[2:]) == 0):
+                for b in binarylist:
+                    exponent = exponent - 1
+                    if b == '1':
+                        break
+            else:
+                exponent = len(str(bin(int(integer))[2:])) - 1
+
+            # Check if exponent is representable in n_exponent bits
+            true_exponent = int(bin(cls.typeExponentOffset + exponent)[2:])
+            if (cls.typeExponentOffset + exponent) > cls.typeExponentMax or (cls.typeExponentOffset + exponent) < 0:
+                return False
+
+            # Append bits to head of mantissa, if integer part is not in scientific notion
+            binarylist2 = []
+            if len(str(bin(int(integer))[2:])) > 1:
+                for digit in str(bin(int(integer))[3:]):
+                    binarylist2.append((digit))
+
+            # If integer part is zero, trim the mantissa bits that have been used to calculate the exponent part
+            if (int(integer) > 0):
+                finalbinaryfraction = binarylist2 + binarylist
+            else:
+                finalbinaryfraction = binarylist
+                while (finalbinaryfraction[0] == '0'):
+                    finalbinaryfraction.pop(0)
+                finalbinaryfraction.pop(0)
+
+            # Fix mantissa size
+            if ((cls.typeFraction - len(finalbinaryfraction)) > 0):
+                finalbinaryfraction += ['0'] * (cls.typeFraction - len(finalbinaryfraction))
+            if (len(finalbinaryfraction) > cls.typeFraction):
+                finalbinaryfraction = finalbinaryfraction[:cls.typeFraction]
+
+            # Check if the value in binary float represent the immediate value
+            exponent_part = 2**exponent
+            mantissa_part = 1
+            for (i, m) in enumerate(finalbinaryfraction):
+                mantissa_part = mantissa_part + 2**(-(i + 1)) * int(m)
+            if (exponent_part * mantissa_part != val):
+                return False
+
+        return True
+
+
 class Pointer(BaseType[Optional[str], _PointerType]):
     """Represents a C Pointer type to an underlying BaseType data type
     """

diff --git a/Deeploy/CommonExtensions/DataTypes.py b/Deeploy/CommonExtensions/DataTypes.py
@@ -25,7 +25,7 @@
 
 from typing import Tuple, Type
 
-from Deeploy.AbstractDataTypes import IntegerImmediate
+from Deeploy.AbstractDataTypes import FloatImmediate, IntegerImmediate
 
 
 class int8_t(IntegerImmediate):
@@ -76,10 +76,27 @@ class uint64_t(IntegerImmediate):
     signed = False
 
 
+class bfloat16(FloatImmediate):
+    typeName = "float16alt"
+    typeWidth = 16
+    typeFraction = 7
+    typeExponent = 8
+    signed = True
+
+
+class float32(FloatImmediate):
+    typeName = "float"
+    typeWidth = 32
+    typeFraction = 23
+    typeExponent = 8
+    signed = True
+
+
 SignedIntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (int8_t, int16_t, int32_t, int64_t)
 UnsignedIntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (uint8_t, uint16_t, uint32_t, uint64_t)
 IntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (sorted((
     *SignedIntegerDataTypes,
     *UnsignedIntegerDataTypes,
 ),
                                                                key = lambda _type: _type.typeWidth))
+FloatDataTypes: Tuple[Type[FloatImmediate], ...] = (bfloat16, float32)
diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py
@@ -45,7 +45,7 @@
 from onnx.external_data_helper import convert_model_to_external_data
 from ortools.constraint_solver.pywrapcp import IntVar
 
-from .AbstractDataTypes import BaseType, IntegerImmediate, Pointer, PointerClass, Struct, VoidType
+from .AbstractDataTypes import BaseType, FloatImmediate, IntegerImmediate, Pointer, PointerClass, Struct, VoidType
 
 Shape = TypeVar("Shape", bound = Any)
 SubGraph = List[gs.Node]
@@ -1903,11 +1903,16 @@ def _broadcastInteger(ty: Type[IntegerImmediate]):
             else:
                 return np.dtype(getattr(np, "uint" + str(ty.typeWidth)))
 
+        def _broadcastFloat(ty: Type[FloatImmediate]):
+            return np.dtype(getattr(np, "double"))
+
         if issubclass(ty, Pointer) and hasattr(ty, "referencedType"):
             if issubclass(ty.referencedType, IntegerImmediate):
                 return _broadcastInteger(ty.referencedType)
         elif issubclass(ty, IntegerImmediate):
             return _broadcastInteger(ty)
+        elif issubclass(ty, FloatImmediate):
+            return _broadcastFloat(ty)
 
         return None
 

diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py
@@ -30,17 +30,18 @@
 from Deeploy.AbstractDataTypes import PointerClass
 from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \
     MemoryManagementGeneration, MemoryPassthroughGeneration
-from Deeploy.CommonExtensions.DataTypes import IntegerDataTypes, SignedIntegerDataTypes, int8_t, int32_t, uint8_t
+from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes, SignedIntegerDataTypes, int8_t, \
+    int32_t, uint8_t
 from Deeploy.DeeployTypes import CodeTransformation, NodeBinding
 from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration
 from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, ConvTemplate, DebugPrintTemplate, \
-    DummyTemplate, DWConvTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, \
+    DummyTemplate, DWConvTemplate, FloatAddTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, \
     ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, ReduceMeanTemplate, \
     ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, \
     TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate
 from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DebugPrintChecker, \
-    DummyChecker, GatherChecker, GELUChecker, GEMMChecker, IntegerDivChecker, MatMulChecker, MaxPoolChecker, \
-    MulChecker, PadChecker, ReduceMeanChecker, ReduceSumChecker, RequantShiftChecker, ReshapeChecker, \
+    DummyChecker, FloatAddChecker, GatherChecker, GELUChecker, GEMMChecker, IntegerDivChecker, MatMulChecker, \
+    MaxPoolChecker, MulChecker, PadChecker, ReduceMeanChecker, ReduceSumChecker, RequantShiftChecker, ReshapeChecker, \
     RQIntegerDivChecker, SliceChecker, SoftmaxChecker, TransposeChecker, iLayerNormChecker
 
 BasicTransformer = CodeTransformation([ArgumentStructGeneration(), MemoryManagementGeneration(), FutureGeneration()])
@@ -65,6 +66,9 @@
                 AddTemplate.referenceTemplate, BasicTransformer)
     for type1 in IntegerDataTypes
     for type2 in IntegerDataTypes
+] + [
+    NodeBinding(FloatAddChecker([PointerClass(type), PointerClass(type)], [PointerClass(type)]),
+                FloatAddTemplate.referenceTemplate, BasicTransformer) for type in FloatDataTypes
 ]
 
 BasicConv1DBinding = NodeBinding(ConvChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]),

diff --git a/Deeploy/Targets/Generic/Templates/FloatAddTemplate.py b/Deeploy/Targets/Generic/Templates/FloatAddTemplate.py
@@ -0,0 +1,62 @@
+# ----------------------------------------------------------------------
+#
+# File: FloatAddTemplate.py
+#
+# Last edited: 15.12.2021
+#
+# Copyright (C) 2021, ETH Zurich and University of Bologna.
+#
+# Author: Moritz Scherer, ETH Zurich
+#
+# ----------------------------------------------------------------------
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, List, Tuple
+
+from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation
+
+
+class _FloatAddTemplate(NodeTemplate):
+
+    def alignToContext(self, ctxt: NetworkContext,
+                       operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict, List[str]]:
+
+        data_in_1 = ctxt.lookup(operatorRepresentation['data_in_1'])
+        data_in_2 = ctxt.lookup(operatorRepresentation['data_in_2'])
+        data_out = ctxt.lookup(operatorRepresentation['data_out'])
+
+        input_1_offset = 0
+        if hasattr(data_in_1, "_signed") and hasattr(data_in_1, "nLevels"):
+            input_1_offset = (data_in_1._signed == 0) * int(data_in_1.nLevels / 2)
+        input_2_offset = 0
+        if hasattr(data_in_2, "_signed") and hasattr(data_in_2, "nLevels"):
+            input_2_offset = (data_in_2._signed == 0) * int(data_in_2.nLevels / 2)
+        output_offset = 0
+        if hasattr(data_out, "_signed") and hasattr(data_out, "nLevels"):
+            output_offset = -(data_out._signed == 0) * int(data_out.nLevels // 2)
+
+        operatorRepresentation['offset'] = input_1_offset + input_2_offset + output_offset
+
+        return ctxt, operatorRepresentation, []
+
+
+referenceTemplate = _FloatAddTemplate("""
+// Add (Name: ${nodeName}, Op: ${nodeOp})
+BEGIN_SINGLE_CORE
+    for (uint32_t i=0;i<${size};i++){
+        ${data_out}[i] = ${data_in_1}[i] + ${data_in_2}[i] + ${offset};
+    }
+END_SINGLE_CORE
+""")
diff --git a/Deeploy/Targets/Generic/TypeCheckers.py b/Deeploy/Targets/Generic/TypeCheckers.py
@@ -125,6 +125,20 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
             return [False]
 
 
+class FloatAddChecker(SignPropTypeChecker):
+
+    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
+        super().__init__(input_types, output_types)
+
+    def _inferNumLevels(self, inputs: List[VariableBuffer],
+                        operatorRepresentation: OperatorRepresentation) -> List[int]:
+        return [inputs[0].nLevels + inputs[1].nLevels]
+
+    def _inferSignedness(self, inputs: List[VariableBuffer],
+                         operatorRepresentation: OperatorRepresentation) -> List[bool]:
+        return [True]
+
+
 class GatherChecker(SignPropTypeChecker):
 
     def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):

diff --git a/DeeployTest/Tests/FloatAdder/activations.npz b/DeeployTest/Tests/FloatAdder/activations.npz
diff --git a/DeeployTest/Tests/FloatAdder/inputs.npz b/DeeployTest/Tests/FloatAdder/inputs.npz
diff --git a/DeeployTest/Tests/FloatAdder/network.onnx b/DeeployTest/Tests/FloatAdder/network.onnx
@@ -0,0 +1,30 @@
+onnxruntime.transformers1.16.1:�
+(
+input
+onnx::Add_1outputAdd_0"Addtorch-jit-exportZ
+input
+
+
+Z
+onnx::Add_1
+
+
+b
+output
+
+
+j
+output
+
+
+B
+B
+
+ai.onnx.mlB
+ai.onnx.trainingB
+com.ms.internal.nhwcB
+ai.onnx.preview.trainingB
+com.microsoftB
+com.microsoft.experimentalB
+com.microsoft.nchwcB
+org.pytorch.aten

diff --git a/DeeployTest/Tests/FloatAdder/outputs.npz b/DeeployTest/Tests/FloatAdder/outputs.npz
diff --git a/DeeployTest/generateNetwork.py b/DeeployTest/generateNetwork.py
@@ -76,9 +76,9 @@
         test_inputs, test_outputs, graph = generateDebugConfig(inputs, outputs, activations, graph)
 
     else:
-        # Load as int64 and infer types later
-        test_inputs = [inputs[x].reshape(-1).astype(np.int64) for x in inputs.files]
-        test_outputs = [outputs[x].reshape(-1).astype(np.int64) for x in outputs.files]
+        # Load as float64 and infer types later
+        test_inputs = [inputs[x].reshape(-1).astype(np.float64) for x in inputs.files]
+        test_outputs = [outputs[x].reshape(-1).astype(np.float64) for x in outputs.files]
 
         # WIESEP: Hack to get CI running because only one specific array is used
         if "WaveFormer" in args.dir: