0.6.3 Release (#41)

PPQ 0.6.3 版本最后一次更新 * 添加了 Metax 模型导出 * 添加了 Clip 算子从 opset 9 到 opset 11的转换 * 修改了 logger 的输出格式 * 修复了一个 caffe exporter 导出 slice算子时遇到的 bug * 修复了一个 equalization 算法上的 bug * 修复了 lsq 算法的一些bug * 在执行 graphwise analyse时，添加了判断条件 * 为 ssd 算法添加了新的算子适配类型
OpenPPL · Mar 30, 2022 · 6c71adb · 6c71adb
1 parent 7cdc22f
commit 6c71adb
Show file tree

Hide file tree

Showing 11 changed files with 327 additions and 201 deletions.
diff --git a/ppq/IR/morph.py b/ppq/IR/morph.py
@@ -187,32 +187,30 @@ def format_clip(self) -> None:
                    min, max parameter will be given by the second and third input variable
                 2. min, max 参数由 attribute 给出
                    min, max parameter will be given by the attribute
-            此函数统一 clip 算子行为：所有 clip 算子的 min, max 参数均由 operation.attribute 给出
-            this func unifies behaviors of clip op: min, max parameter will be given in 
-            attribute
+            此函数统一 clip 算子行为：所有 clip 算子的 min, max 参数第二第三个变量给出
+            this func unifies behaviors of clip op: min, max parameter will be given by input vars
             针对可能存在的 min, max 为空的情况，将其直接置为 2 << 30（保证处理后非空）
 
             当 min, max 参数由 第二、第三个输入变量给出时，其中一个为空时直接返回 ValueError
             ValueError will be raised when any of min, max parameters is null
         """
+
         interested_ops = []
         for _, operation in self.graph.operations.items():
-            if operation.type == 'Clip': interested_ops.append(operation)
-        for operation in interested_ops:
-            assert isinstance(operation, Operation)
-            if len(operation.inputs) == 3:
-                min_constant_op, max_constant_op = [var.source_op for var in operation.inputs[1:]]
-                min = convert_any_to_python_primary_type(min_constant_op.attributes['value'])
-                max = convert_any_to_python_primary_type(max_constant_op.attributes['value'])
-                self.__delete_constant_input(operation, 2)
-                self.__delete_constant_input(operation, 1)
-            elif len(operation.inputs) == 1:
-                min = operation.attributes.get('min', - 2 << 30)
-                max = operation.attributes.get('max', + 2 << 30)
-            else:
-                raise ValueError(f'Expect clip has 1 or 3 inputs, while {len(operation.inputs)} was given')
-            operation.attributes['min'] = min
-            operation.attributes['max'] = max
+            if operation.type == 'Clip' and ('min' in operation.attributes or 'max' in operation.attributes): 
+                interested_ops.append(operation)
+        for op in interested_ops:
+            assert isinstance(op, Operation)
+            min = op.attributes.get('min', - 2 << 30)
+            max = op.attributes.get('max', + 2 << 30)
+            min_var = Variable(name=op.name + '_min', value=min, is_parameter=True, dest_ops=[op])
+            max_var = Variable(name=op.name + '_max', value=max, is_parameter=True, dest_ops=[op])
+            self.graph.append_variable(min_var)
+            self.graph.append_variable(max_var)
+            op.inputs.append(min_var)
+            op.inputs.append(max_var)
+            if 'min' in op.attributes: op.attributes.pop('min')
+            if 'max' in op.attributes: op.attributes.pop('max')
 
     def format_gather(self) -> None:
         """

diff --git a/ppq/api/interface.py b/ppq/api/interface.py
@@ -65,6 +65,8 @@
     TargetPlatform.EXTENSION:     ExtensionExporter,
     # TargetPlatform.ORT_OOS_INT8:  ONNXRUNTIMExporter,
     TargetPlatform.ORT_OOS_INT8:  ORTOOSExporter,
+    TargetPlatform.METAX_INT8_C:  ONNXRUNTIMExporter,
+    TargetPlatform.METAX_INT8_T:  ONNXRUNTIMExporter,
 }
 
 # 为你的导出模型取一个好听的后缀名
@@ -81,6 +83,8 @@
     TargetPlatform.NATIVE:        '.native',
     TargetPlatform.EXTENSION:     '.ext',
     TargetPlatform.ORT_OOS_INT8:  '.onnx',
+    TargetPlatform.METAX_INT8_C:  '.onnx',
+    TargetPlatform.METAX_INT8_T:  '.onnx',
 }
 
 def load_graph(file_path: str, from_framework: NetworkFramework=NetworkFramework.ONNX, **kwargs) -> BaseGraph:
@@ -529,6 +533,7 @@ def format_graph(graph: BaseGraph) -> BaseGraph:
     formatter(GraphCommand(GraphCommandType.FORMAT_PARAMETERS))
     formatter(GraphCommand(GraphCommandType.FORMAT_CAST))
     formatter(GraphCommand(GraphCommandType.FORMAT_SLICE))
+    formatter(GraphCommand(GraphCommandType.FORMAT_CLIP))
     formatter(GraphCommand(GraphCommandType.DELETE_ISOLATED))
 
     return graph

diff --git a/ppq/csrc/cuda/train.cu b/ppq/csrc/cuda/train.cu
@@ -335,4 +335,4 @@ __host__ Tensor RoundingLoss_LC_B(
         clip_min, clip_max, rounding, PTR<float>(grad_v)
     );
     return grad_v;
-}
+}
diff --git a/ppq/log/logger.py b/ppq/log/logger.py
@@ -128,7 +128,16 @@ def error(self, msg:str):
                 handler.process(msg, LEVEL.ERROR)
             else:
                 handler.process(print_msg, LEVEL.ERROR)
+
+    def debug(self, msg: str):
+        msg = self.wrap_header(msg, 'DEBUG')
+        print_msg = G_BEGIN + msg + COLOR_END
 
+        for handler in self._handlers.values():
+            if handler._file_name is not None:
+                handler.process(msg, LEVEL.DEBUG)
+            else: 
+                handler.process(print_msg, LEVEL.DEBUG)
 
     def register_handler(self, file_name: str, level: Union[str, LEVEL]=LEVEL.INFO):
         if file_name not in self._handlers:

diff --git a/ppq/parser/caffe_exporter.py b/ppq/parser/caffe_exporter.py
@@ -10,7 +10,7 @@
 from ppq.executor.torch import TorchExecutor
 from ppq.IR import BaseGraph, GraphExporter
 from ppq.IR.morph import GraphDeviceSwitcher
-from ppq.IR.quantize import QuantableOperation
+from ppq.IR.quantize import QuantableOperation, QuantableVariable
 from ppq.log import NaiveLogger
 
 from .caffe import ppl_caffe_pb2
@@ -260,8 +260,38 @@ def export(self, file_path: str, graph: BaseGraph, config_path: str = None, inpu
 
             # step - 1, find corresponding op
             if layer_name not in graph.operations:
-                raise KeyError(f'Can not find operation {layer_name} with current graph.')
-
+                # PATCH FOR Slice
+                if layer.type == 'Slice':
+                    # slice0 --> (ppq parse, export, combine) --> slice0_0_0, everything else
+                    # is the same with original model
+                    # layer_name is slice0_0_0, obtain original_name slice0
+                    original_name = ''.join(layer_name.split('_')[:-2])
+                    for bottom in layer.bottom:
+                        var = graph.variables.get(bottom, None)
+                        if var is not None and isinstance(var, QuantableVariable) and not var.is_parameter:
+                            cfg = None
+                            for dest_op, dest_cfg in zip(var.dest_ops, var.dest_op_configs):
+                                # dest_op.name is slice0_0
+                                if ''.join(dest_op.name.split('_')[:-1]) == original_name:
+                                    cfg = dest_cfg
+                                    break
+                            assert cfg is not None
+                            qt_min = convert_value(cfg.scale * (cfg.quant_min - cfg.offset), True, DataType.FP32)
+                            qt_max = convert_value(cfg.scale * (cfg.quant_max - cfg.offset), True, DataType.FP32)
+                            layer.quantize_param.add(type='bottom', range_min=qt_min, range_max=qt_max)
+
+                    for top in layer.top:
+                        var = graph.variables.get(top, None)
+                        if var is not None and isinstance(var, QuantableVariable) and not var.is_parameter:
+                            cfg = var.source_op_config
+                            assert cfg is not None
+                            qt_min = convert_value(cfg.scale * (cfg.quant_min - cfg.offset), True, DataType.FP32)
+                            qt_max = convert_value(cfg.scale * (cfg.quant_max - cfg.offset), True, DataType.FP32)
+                            layer.quantize_param.add(type='top', range_min=qt_min, range_max=qt_max)
+                    continue
+                else:
+                    raise KeyError(f'Can not find operation {layer_name} with current graph.')
+
             op = graph.operations[layer_name]
             if not isinstance(op, QuantableOperation): continue
 
@@ -371,10 +401,40 @@ def export(self, file_path: str, graph: BaseGraph, config_path: str = None, \
             # layer is a caffe data structure, corresponding to operation in ppq.
             # following code write ppq quantization configuration to caffe layer.
 
-            # step - 1, find corresponding op
+             # step - 1, find corresponding op
             if layer_name not in graph.operations:
-                raise KeyError(f'Can not find operation {layer_name} with current graph.')
-
+                # PATCH FOR Slice
+                if layer.type == 'Slice':
+                    # slice0 --> (ppq parse, export, combine) --> slice0_0_0, everything else
+                    # is the same with original model
+                    # layer_name is slice0_0_0, obtain original_name slice0
+                    original_name = ''.join(layer_name.split('_')[:-2])
+                    for bottom in layer.bottom:
+                        var = graph.variables.get(bottom, None)
+                        if var is not None and isinstance(var, QuantableVariable) and not var.is_parameter:
+                            cfg = None
+                            for dest_op, dest_cfg in zip(var.dest_ops, var.dest_op_configs):
+                                # dest_op.name is slice0_0
+                                if ''.join(dest_op.name.split('_')[:-1]) == original_name:
+                                    cfg = dest_cfg
+                                    break
+                            assert cfg is not None
+                            qt_min = convert_value(cfg.scale * (cfg.quant_min - cfg.offset), True, DataType.FP32)
+                            qt_max = convert_value(cfg.scale * (cfg.quant_max - cfg.offset), True, DataType.FP32)
+                            layer.quantize_param.add(type='bottom', range_min=qt_min, range_max=qt_max)
+
+                    for top in layer.top:
+                        var = graph.variables.get(top, None)
+                        if var is not None and isinstance(var, QuantableVariable) and not var.is_parameter:
+                            cfg = var.source_op_config
+                            assert cfg is not None
+                            qt_min = convert_value(cfg.scale * (cfg.quant_min - cfg.offset), True, DataType.FP32)
+                            qt_max = convert_value(cfg.scale * (cfg.quant_max - cfg.offset), True, DataType.FP32)
+                            layer.quantize_param.add(type='top', range_min=qt_min, range_max=qt_max)
+                    continue
+                else:
+                    raise KeyError(f'Can not find operation {layer_name} with current graph.')
+
             op = graph.operations[layer_name]
             if not isinstance(op, QuantableOperation): continue
 

diff --git a/ppq/quantization/algorithm/equalization.py b/ppq/quantization/algorithm/equalization.py
@@ -194,7 +194,7 @@ def layerwise_equalize(
 
             elif downstream_layer.type == 'Gemm':
                 weight, bias = self.get_linear_params(downstream_layer, False)
-                weight *= torch.reshape(scale, (1, -1))
+                weight /= torch.reshape(scale, (1, -1))
 
                 self.set_linear_params(downstream_layer, bias, weight)
 
@@ -258,6 +258,8 @@ def get_linear_params(self, linear: Operation, including_bias: bool):
         if including_bias and len(linear.parameters) > 1:
             bias = linear.parameters[1].value
 
+        if not linear.attributes.get('transB', 0):
+            weight = torch.transpose(weight, 1, 0)
         if bias is not None: return weight, bias
         else: return [weight, None]
 
@@ -285,7 +287,9 @@ def set_linear_params(self, linear: Operation, bias: torch.Tensor, weight: torch
 
         assert linear.type == 'Gemm', (
             'Except input object with type Gemm, but %s got' % linear.type)
-
+
+        if not linear.attributes.get('transB', 0):
+            weight = torch.transpose(weight, 1, 0)
         linear.parameters[0].value = weight
         if bias is not None and len(linear.parameters) > 1:
             linear.parameters[1].value = bias

diff --git a/ppq/quantization/algorithm/training.py b/ppq/quantization/algorithm/training.py
@@ -643,9 +643,9 @@ def __init__(self,
         self.passive          = config.state == QuantizationStates.PASSIVE
         self.scale_multiplier = scale_multiplier
         self.scale            = torch.nn.Parameter(convert_any_to_torch_tensor(config.scale, device=device,\
-                            dtype=torch.float32), requires_grad=True)
+                            dtype=torch.float32).clone(), requires_grad=True)
         self.bias             = torch.nn.Parameter(convert_any_to_torch_tensor(config.offset, device=device,\
-                            dtype=torch.float32) * self.scale.detach(), requires_grad=True)
+                            dtype=torch.float32).clone() * self.scale.detach(), requires_grad=True)
         self._masters          = []
 
     @ property
@@ -669,7 +669,8 @@ def finalize(self) -> None:
         if self.config.dominated_by == self.config:
             if not self.passive:
                 self.config.scale = self.scale.data.abs()
-                self.config.offset = self.bias.data / self.scale.data.abs()
+                self.config.offset = torch.clamp(self.bias.data.abs() / self.scale.data.abs(), \
+                    self.config.quant_min, self.config.quant_max)
             else:
                 # bias
                 scale = self.scale_multiplier
@@ -705,7 +706,7 @@ def __call__(self, tensor: torch.Tensor, config: TensorQuantizationConfig) -> to
 
         # only bias doesn't need offset in asym quant
         if not self.passive and config.policy.has_property(QuantizationProperty.ASYMMETRICAL):
-            tensor = tensor + bias
+            tensor = tensor + bias.abs()
 
         scale = scale.abs()
         tensor = tensor / scale
@@ -715,7 +716,7 @@ def __call__(self, tensor: torch.Tensor, config: TensorQuantizationConfig) -> to
         tensor = tensor * scale 
 
         if not self.passive and config.policy.has_property(QuantizationProperty.ASYMMETRICAL):
-            tensor = tensor - bias
+            tensor = tensor - bias.abs()
         return tensor
 
 

diff --git a/ppq/quantization/analyise/graphwise.py b/ppq/quantization/analyise/graphwise.py
@@ -95,6 +95,9 @@ def graphwise_error_analyse(
     interested_op = [operation for operation in graph.operations.values()
                      if (isinstance(operation, QuantableOperation) and 
                          operation.config.output_quantization_config[0].state == QuantizationStates.ACTIVATED)]
+    if len(interested_op) == 0: 
+        print('Oops. you got nothing to analyse.')
+        return
 
     # set up all hooks.
     recorders, hooks, caches = {}, {}, {}