From b7024ddfe5b435a7aa481f81a9449dc485fd71b4 Mon Sep 17 00:00:00 2001
From: RunningLeon <mnsheng@yeah.net>
Date: Fri, 25 Aug 2023 10:18:05 +0800
Subject: [PATCH] [Feature]: Support deployment of panoptic segmentation models
 (#2347)

* add semantic segmentation head

 Author:    Daigo Hirooka <daigo.hirooka@gmail.com>
 Date:      Sun Jun 26 18:25:29 2022 +0900

* add panoptic detection model

* add panoptic segmentation configs

* support panoptic-fpn

* remove interpolate

* update

* support panoptic-fpn mask2former maskformer

* update

* support dynamic

* update

* remove unused rewritings for mask2former

* Revert "remove unused rewritings for mask2former"

This reverts commit 2b6d24ae5fe910b473438e7c780513e31e0c52e1.

* update configs and regs

* debug dynamic

* fix for panoptic-fpn

* update

* remove rewritings for mask2former

* update reg test config

* fix

* update docs

* fix comments

* fix

---------

Co-authored-by: Daigo Hirooka <daigo.hirooka@gmail.com>
---
 .github/scripts/prepare_reg_test.py           |   6 +
 .../mmdet/_base_/base_panoptic-seg_static.py  |  15 +
 ...ptic-seg_maskformer_onnxruntime_dynamic.py |  20 ++
 ..._maskformer_onnxruntime_static-800x1344.py |   8 +
 ...rmer_tensorrt_dynamic-320x512-1344x1344.py |  27 ++
 ...seg_maskformer_tensorrt_static-800x1344.py |  19 ++
 ...ic-seg_panoptic-fpn_onnxruntime_dynamic.py |  32 +++
 ...-fpn_tensorrt_dynamic-352x512-1344x1344.py |  43 +++
 docs/en/04-supported-codebases/mmdet.md       |  57 ++--
 docs/zh_cn/04-supported-codebases/mmdet.md    |  56 ++--
 mmdeploy/apis/onnx/export.py                  |   9 +-
 .../mmdet/deploy/object_detection_model.py    | 258 ++++++++++++++----
 mmdeploy/codebase/mmdet/models/__init__.py    |   1 +
 .../mmdet/models/detectors/__init__.py        |   6 +-
 .../mmdet/models/detectors/maskformer.py      |  54 ++++
 .../detectors/panoptic_two_stage_segmentor.py |  65 +++++
 .../mmdet/models/detectors/single_stage.py    |   4 -
 .../mmdet/models/detectors/two_stage.py       |   2 -
 .../mmdet/models/seg_heads/__init__.py        |   4 +
 .../models/seg_heads/base_semantic_head.py    |  31 +++
 mmdeploy/mmcv/ops/multi_scale_deform_attn.py  |   1 +
 tests/regression/mmdet.yml                    |  40 ++-
 22 files changed, 652 insertions(+), 106 deletions(-)
 create mode 100644 configs/mmdet/_base_/base_panoptic-seg_static.py
 create mode 100644 configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py
 create mode 100644 configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_static-800x1344.py
 create mode 100644 configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py
 create mode 100644 configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-800x1344.py
 create mode 100644 configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_onnxruntime_dynamic.py
 create mode 100644 configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_tensorrt_dynamic-352x512-1344x1344.py
 create mode 100644 mmdeploy/codebase/mmdet/models/detectors/maskformer.py
 create mode 100644 mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py
 create mode 100644 mmdeploy/codebase/mmdet/models/seg_heads/__init__.py
 create mode 100644 mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py

diff --git a/.github/scripts/prepare_reg_test.py b/.github/scripts/prepare_reg_test.py
index 96065ef3e6..e7f6e67751 100644
--- a/.github/scripts/prepare_reg_test.py
+++ b/.github/scripts/prepare_reg_test.py
@@ -98,6 +98,12 @@ def prepare_codebases(codebases):
                             f'{MMDEPLOY_DIR}/configs/mmyolo')
             shutil.copy(f'{target_dir}/tests/regression/mmyolo.yml',
                         f'{MMDEPLOY_DIR}/tests/regression/mmyolo.yml')
+        elif codebase == 'mmdet':
+            # for panoptic
+            run_cmd([
+                'python -m pip install ',
+                'git+https://github.com/cocodataset/panopticapi.git',
+            ])
 
 
 def install_torch(torch_version):
diff --git a/configs/mmdet/_base_/base_panoptic-seg_static.py b/configs/mmdet/_base_/base_panoptic-seg_static.py
new file mode 100644
index 0000000000..bc117ce6ae
--- /dev/null
+++ b/configs/mmdet/_base_/base_panoptic-seg_static.py
@@ -0,0 +1,15 @@
+_base_ = ['../../_base_/onnx_config.py']
+
+codebase_config = dict(
+    type='mmdet',
+    task='ObjectDetection',
+    model_type='panoptic_end2end',
+    post_processing=dict(
+        export_postprocess_mask=False,
+        score_threshold=0.0,
+        iou_threshold=0.5,
+        max_output_boxes_per_class=200,
+        pre_top_k=5000,
+        keep_top_k=100,
+        background_label_id=-1,
+    ))
diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py
new file mode 100644
index 0000000000..5ccd7794e2
--- /dev/null
+++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py
@@ -0,0 +1,20 @@
+_base_ = [
+    './panoptic-seg_maskformer_onnxruntime_static-800x1344.py',
+]
+onnx_config = dict(
+    dynamic_axes={
+        'input': {
+            0: 'batch',
+            2: 'height',
+            3: 'width'
+        },
+        'cls_logits': {
+            0: 'batch',
+        },
+        'mask_logits': {
+            0: 'batch',
+            2: 'h',
+            3: 'w',
+        },
+    },
+    input_shape=None)
diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_static-800x1344.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_static-800x1344.py
new file mode 100644
index 0000000000..5ee9077731
--- /dev/null
+++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_static-800x1344.py
@@ -0,0 +1,8 @@
+_base_ = [
+    '../_base_/base_panoptic-seg_static.py',
+    '../../_base_/backends/onnxruntime.py'
+]
+onnx_config = dict(
+    opset_version=13,
+    output_names=['cls_logits', 'mask_logits'],
+    input_shape=[1344, 800])
diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py
new file mode 100644
index 0000000000..8dcddabc2b
--- /dev/null
+++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py
@@ -0,0 +1,27 @@
+_base_ = ['./panoptic-seg_maskformer_tensorrt_static-800x1344.py']
+onnx_config = dict(
+    dynamic_axes={
+        'input': {
+            0: 'batch',
+            2: 'height',
+            3: 'width'
+        },
+        'cls_logits': {
+            0: 'batch',
+        },
+        'mask_logits': {
+            0: 'batch',
+            2: 'h',
+            3: 'w',
+        },
+    },
+    input_shape=None)
+
+backend_config = dict(model_inputs=[
+    dict(
+        input_shapes=dict(
+            input=dict(
+                min_shape=[1, 3, 320, 512],
+                opt_shape=[1, 3, 800, 1344],
+                max_shape=[1, 3, 1344, 1344])))
+])
diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-800x1344.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-800x1344.py
new file mode 100644
index 0000000000..8cfe30734a
--- /dev/null
+++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-800x1344.py
@@ -0,0 +1,19 @@
+_base_ = [
+    '../_base_/base_panoptic-seg_static.py',
+    '../../_base_/backends/tensorrt.py'
+]
+onnx_config = dict(
+    opset_version=13,
+    output_names=['cls_logits', 'mask_logits'],
+    input_shape=[1344, 800])
+
+backend_config = dict(
+    common_config=dict(max_workspace_size=1 << 30),
+    model_inputs=[
+        dict(
+            input_shapes=dict(
+                input=dict(
+                    min_shape=[1, 3, 800, 1344],
+                    opt_shape=[1, 3, 800, 1344],
+                    max_shape=[1, 3, 800, 1344])))
+    ])
diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_onnxruntime_dynamic.py b/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_onnxruntime_dynamic.py
new file mode 100644
index 0000000000..88bf4944f7
--- /dev/null
+++ b/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_onnxruntime_dynamic.py
@@ -0,0 +1,32 @@
+_base_ = [
+    '../_base_/base_panoptic-seg_static.py',
+    '../../_base_/backends/onnxruntime.py'
+]
+onnx_config = dict(
+    input_shape=None,
+    output_names=['dets', 'labels', 'masks', 'semseg'],
+    dynamic_axes={
+        'input': {
+            0: 'batch',
+            2: 'height',
+            3: 'width'
+        },
+        'dets': {
+            0: 'batch',
+            1: 'num_dets',
+        },
+        'labels': {
+            0: 'batch',
+            1: 'num_dets',
+        },
+        'masks': {
+            0: 'batch',
+            1: 'num_dets',
+        },
+        'semseg': {
+            0: 'batch',
+            2: 'height',
+            3: 'width'
+        },
+    },
+)
diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_tensorrt_dynamic-352x512-1344x1344.py b/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_tensorrt_dynamic-352x512-1344x1344.py
new file mode 100644
index 0000000000..79f50ab512
--- /dev/null
+++ b/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_tensorrt_dynamic-352x512-1344x1344.py
@@ -0,0 +1,43 @@
+_base_ = [
+    '../_base_/base_panoptic-seg_static.py',
+    '../../_base_/backends/tensorrt.py'
+]
+onnx_config = dict(
+    input_shape=None,
+    output_names=['dets', 'labels', 'masks', 'semseg'],
+    dynamic_axes={
+        'input': {
+            0: 'batch',
+            2: 'height',
+            3: 'width'
+        },
+        'dets': {
+            0: 'batch',
+            1: 'num_dets',
+        },
+        'labels': {
+            0: 'batch',
+            1: 'num_dets',
+        },
+        'masks': {
+            0: 'batch',
+            1: 'num_dets',
+        },
+        'semseg': {
+            0: 'batch',
+            2: 'height',
+            3: 'width'
+        },
+    },
+)
+
+backend_config = dict(
+    common_config=dict(max_workspace_size=1 << 30),
+    model_inputs=[
+        dict(
+            input_shapes=dict(
+                input=dict(
+                    min_shape=[1, 3, 352, 512],
+                    opt_shape=[1, 3, 800, 1344],
+                    max_shape=[1, 3, 1344, 1344])))
+    ])
diff --git a/docs/en/04-supported-codebases/mmdet.md b/docs/en/04-supported-codebases/mmdet.md
index 847c9311a4..84e1fe5922 100644
--- a/docs/en/04-supported-codebases/mmdet.md
+++ b/docs/en/04-supported-codebases/mmdet.md
@@ -10,6 +10,7 @@
     - [Backend model inference](#backend-model-inference)
     - [SDK model inference](#sdk-model-inference)
   - [Supported models](#supported-models)
+  - [Reminder](#reminder)
 
 ______________________________________________________________________
 
@@ -189,27 +190,35 @@ Besides python API, mmdeploy SDK also provides other FFI (Foreign Function Inter
 
 ## Supported models
 
-|                                             Model                                             |         Task          | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO |
-| :-------------------------------------------------------------------------------------------: | :-------------------: | :---------: | :------: | :--: | :---: | :------: |
-|            [ATSS](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/atss)            |   Object Detection    |      Y      |    Y     |  N   |   N   |    Y     |
-|            [FCOS](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/fcos)            |   Object Detection    |      Y      |    Y     |  Y   |   N   |    Y     |
-|        [FoveaBox](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/foveabox)        |   Object Detection    |      Y      |    N     |  N   |   N   |    Y     |
-|            [FSAF](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/fsaf)            |   Object Detection    |      Y      |    Y     |  Y   |   Y   |    Y     |
-|       [RetinaNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/retinanet)       |   Object Detection    |      Y      |    Y     |  Y   |   Y   |    Y     |
-|             [SSD](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/ssd)             |   Object Detection    |      Y      |    Y     |  Y   |   N   |    Y     |
-|           [VFNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/vfnet)           |   Object Detection    |      N      |    N     |  N   |   N   |    Y     |
-|           [YOLOv3](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/yolo)           |   Object Detection    |      Y      |    Y     |  Y   |   N   |    Y     |
-|           [YOLOX](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/yolox)           |   Object Detection    |      Y      |    Y     |  Y   |   N   |    Y     |
-|   [Cascade R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/cascade_rcnn)    |   Object Detection    |      Y      |    Y     |  N   |   Y   |    Y     |
-|    [Faster R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/faster_rcnn)     |   Object Detection    |      Y      |    Y     |  Y   |   Y   |    Y     |
-| [Faster R-CNN + DCN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/faster_rcnn)  |   Object Detection    |      Y      |    Y     |  Y   |   Y   |    Y     |
-|             [GFL](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/gfl)             |   Object Detection    |      Y      |    Y     |  N   |   ?   |    Y     |
-|       [RepPoints](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/reppoints)       |   Object Detection    |      N      |    Y     |  N   |   ?   |    Y     |
-|            [DETR](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/detr)            |   Object Detection    |      Y      |    Y     |  N   |   ?   |    Y     |
-|       [CenterNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/centernet)       |   Object Detection    |      Y      |    Y     |  N   |   ?   |    Y     |
-|          [RTMDet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/rtmdet)          |   Object Detection    |      Y      |    Y     |  N   |   ?   |    Y     |
-| [Cascade Mask R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/cascade_rcnn) | Instance Segmentation |      Y      |    Y     |  N   |   N   |    Y     |
-|      [Mask R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/mask_rcnn)       | Instance Segmentation |      Y      |    Y     |  N   |   N   |    Y     |
-|      [Swin Transformer](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/swin)      | Instance Segmentation |      Y      |    Y     |  N   |   N   |    Y     |
-|            [SOLO](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solo)            | Instance Segmentation |      Y      |    N     |  N   |   N   |    Y     |
-|          [SOLOv2](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solov2)          | Instance Segmentation |      Y      |    N     |  N   |   N   |    Y     |
+|                                                  Model                                                   |         Task          | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO |
+| :------------------------------------------------------------------------------------------------------: | :-------------------: | :---------: | :------: | :--: | :---: | :------: |
+|                 [ATSS](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/atss)                  |   Object Detection    |      Y      |    Y     |  N   |   N   |    Y     |
+|                 [FCOS](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/fcos)                  |   Object Detection    |      Y      |    Y     |  Y   |   N   |    Y     |
+|             [FoveaBox](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/foveabox)              |   Object Detection    |      Y      |    N     |  N   |   N   |    Y     |
+|                 [FSAF](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/fsaf)                  |   Object Detection    |      Y      |    Y     |  Y   |   Y   |    Y     |
+|            [RetinaNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/retinanet)             |   Object Detection    |      Y      |    Y     |  Y   |   Y   |    Y     |
+|                  [SSD](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/ssd)                   |   Object Detection    |      Y      |    Y     |  Y   |   N   |    Y     |
+|                [VFNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/vfnet)                 |   Object Detection    |      N      |    N     |  N   |   N   |    Y     |
+|                [YOLOv3](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/yolo)                 |   Object Detection    |      Y      |    Y     |  Y   |   N   |    Y     |
+|                [YOLOX](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/yolox)                 |   Object Detection    |      Y      |    Y     |  Y   |   N   |    Y     |
+|         [Cascade R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/cascade_rcnn)         |   Object Detection    |      Y      |    Y     |  N   |   Y   |    Y     |
+|          [Faster R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/faster_rcnn)          |   Object Detection    |      Y      |    Y     |  Y   |   Y   |    Y     |
+|       [Faster R-CNN + DCN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/faster_rcnn)       |   Object Detection    |      Y      |    Y     |  Y   |   Y   |    Y     |
+|                  [GFL](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/gfl)                   |   Object Detection    |      Y      |    Y     |  N   |   ?   |    Y     |
+|            [RepPoints](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/reppoints)             |   Object Detection    |      N      |    Y     |  N   |   ?   |    Y     |
+|                 [DETR](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/detr)                  |   Object Detection    |      Y      |    Y     |  N   |   ?   |    Y     |
+|            [CenterNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/centernet)             |   Object Detection    |      Y      |    Y     |  N   |   ?   |    Y     |
+|               [RTMDet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/rtmdet)                |   Object Detection    |      Y      |    Y     |  N   |   ?   |    Y     |
+|      [Cascade Mask R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/cascade_rcnn)       | Instance Segmentation |      Y      |    Y     |  N   |   N   |    Y     |
+|            [Mask R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/mask_rcnn)            | Instance Segmentation |      Y      |    Y     |  N   |   N   |    Y     |
+|           [Swin Transformer](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/swin)            | Instance Segmentation |      Y      |    Y     |  N   |   N   |    Y     |
+|                 [SOLO](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solo)                  | Instance Segmentation |      Y      |    N     |  N   |   N   |    Y     |
+|               [SOLOv2](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solov2)                | Instance Segmentation |      Y      |    N     |  N   |   N   |    Y     |
+|         [Panoptic FPN](https://github.com/open-mmlab/mmdetection/tree/main/configs/panoptic_fpn)         | Panoptic Segmentation |      Y      |    Y     |  N   |   N   |    N     |
+|           [MaskFormer](https://github.com/open-mmlab/mmdetection/tree/main/configs/maskformer)           | Panoptic Segmentation |      Y      |    Y     |  N   |   N   |    N     |
+| [Mask2Former](https://github.com/open-mmlab/mmdetection/tree/main/configs/mask2former)[\*](#mask2former) | Panoptic Segmentation |      Y      |    Y     |  N   |   N   |    N     |
+
+## Reminder
+
+- For transformer based models, strongly suggest use `TensorRT>=8.4`.
+- <i id="mask2former">Mask2Former</i> should use `TensorRT>=8.6.1` for dynamic shape inference.
diff --git a/docs/zh_cn/04-supported-codebases/mmdet.md b/docs/zh_cn/04-supported-codebases/mmdet.md
index 4a42b38ca8..17c501630f 100644
--- a/docs/zh_cn/04-supported-codebases/mmdet.md
+++ b/docs/zh_cn/04-supported-codebases/mmdet.md
@@ -192,27 +192,35 @@ cv2.imwrite('output_detection.png', img)
 
 ## 模型支持列表
 
-|                                             Model                                             |         Task          | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO |
-| :-------------------------------------------------------------------------------------------: | :-------------------: | :---------: | :------: | :--: | :---: | :------: |
-|            [ATSS](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/atss)            |   Object Detection    |      Y      |    Y     |  N   |   N   |    Y     |
-|            [FCOS](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/fcos)            |   Object Detection    |      Y      |    Y     |  Y   |   N   |    Y     |
-|        [FoveaBox](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/foveabox)        |   Object Detection    |      Y      |    N     |  N   |   N   |    Y     |
-|            [FSAF](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/fsaf)            |   Object Detection    |      Y      |    Y     |  Y   |   Y   |    Y     |
-|       [RetinaNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/retinanet)       |   Object Detection    |      Y      |    Y     |  Y   |   Y   |    Y     |
-|             [SSD](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/ssd)             |   Object Detection    |      Y      |    Y     |  Y   |   N   |    Y     |
-|           [VFNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/vfnet)           |   Object Detection    |      N      |    N     |  N   |   N   |    Y     |
-|           [YOLOv3](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/yolo)           |   Object Detection    |      Y      |    Y     |  Y   |   N   |    Y     |
-|           [YOLOX](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/yolox)           |   Object Detection    |      Y      |    Y     |  Y   |   N   |    Y     |
-|   [Cascade R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/cascade_rcnn)    |   Object Detection    |      Y      |    Y     |  N   |   Y   |    Y     |
-|    [Faster R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/faster_rcnn)     |   Object Detection    |      Y      |    Y     |  Y   |   Y   |    Y     |
-| [Faster R-CNN + DCN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/faster_rcnn)  |   Object Detection    |      Y      |    Y     |  Y   |   Y   |    Y     |
-|             [GFL](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/gfl)             |   Object Detection    |      Y      |    Y     |  N   |   ?   |    Y     |
-|       [RepPoints](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/reppoints)       |   Object Detection    |      N      |    Y     |  N   |   ?   |    Y     |
-|            [DETR](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/detr)            |   Object Detection    |      Y      |    Y     |  N   |   ?   |    Y     |
-|       [CenterNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/centernet)       |   Object Detection    |      Y      |    Y     |  N   |   ?   |    Y     |
-|          [RTMDet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/rtmdet)          |   Object Detection    |      Y      |    Y     |  N   |   ?   |    Y     |
-| [Cascade Mask R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/cascade_rcnn) | Instance Segmentation |      Y      |    Y     |  N   |   N   |    Y     |
-|      [Mask R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/mask_rcnn)       | Instance Segmentation |      Y      |    Y     |  N   |   N   |    Y     |
-|      [Swin Transformer](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/swin)      | Instance Segmentation |      Y      |    Y     |  N   |   N   |    Y     |
-|            [SOLO](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solo)            | Instance Segmentation |      Y      |    N     |  N   |   N   |    Y     |
-|          [SOLOv2](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solov2)          | Instance Segmentation |      Y      |    N     |  N   |   N   |    Y     |
+|                                                  Model                                                   |         Task          | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO |
+| :------------------------------------------------------------------------------------------------------: | :-------------------: | :---------: | :------: | :--: | :---: | :------: |
+|                 [ATSS](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/atss)                  |   Object Detection    |      Y      |    Y     |  N   |   N   |    Y     |
+|                 [FCOS](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/fcos)                  |   Object Detection    |      Y      |    Y     |  Y   |   N   |    Y     |
+|             [FoveaBox](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/foveabox)              |   Object Detection    |      Y      |    N     |  N   |   N   |    Y     |
+|                 [FSAF](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/fsaf)                  |   Object Detection    |      Y      |    Y     |  Y   |   Y   |    Y     |
+|            [RetinaNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/retinanet)             |   Object Detection    |      Y      |    Y     |  Y   |   Y   |    Y     |
+|                  [SSD](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/ssd)                   |   Object Detection    |      Y      |    Y     |  Y   |   N   |    Y     |
+|                [VFNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/vfnet)                 |   Object Detection    |      N      |    N     |  N   |   N   |    Y     |
+|                [YOLOv3](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/yolo)                 |   Object Detection    |      Y      |    Y     |  Y   |   N   |    Y     |
+|                [YOLOX](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/yolox)                 |   Object Detection    |      Y      |    Y     |  Y   |   N   |    Y     |
+|         [Cascade R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/cascade_rcnn)         |   Object Detection    |      Y      |    Y     |  N   |   Y   |    Y     |
+|          [Faster R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/faster_rcnn)          |   Object Detection    |      Y      |    Y     |  Y   |   Y   |    Y     |
+|       [Faster R-CNN + DCN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/faster_rcnn)       |   Object Detection    |      Y      |    Y     |  Y   |   Y   |    Y     |
+|                  [GFL](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/gfl)                   |   Object Detection    |      Y      |    Y     |  N   |   ?   |    Y     |
+|            [RepPoints](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/reppoints)             |   Object Detection    |      N      |    Y     |  N   |   ?   |    Y     |
+|                 [DETR](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/detr)                  |   Object Detection    |      Y      |    Y     |  N   |   ?   |    Y     |
+|            [CenterNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/centernet)             |   Object Detection    |      Y      |    Y     |  N   |   ?   |    Y     |
+|               [RTMDet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/rtmdet)                |   Object Detection    |      Y      |    Y     |  N   |   ?   |    Y     |
+|      [Cascade Mask R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/cascade_rcnn)       | Instance Segmentation |      Y      |    Y     |  N   |   N   |    Y     |
+|            [Mask R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/mask_rcnn)            | Instance Segmentation |      Y      |    Y     |  N   |   N   |    Y     |
+|           [Swin Transformer](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/swin)            | Instance Segmentation |      Y      |    Y     |  N   |   N   |    Y     |
+|                 [SOLO](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solo)                  | Instance Segmentation |      Y      |    N     |  N   |   N   |    Y     |
+|               [SOLOv2](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solov2)                | Instance Segmentation |      Y      |    N     |  N   |   N   |    Y     |
+|         [Panoptic FPN](https://github.com/open-mmlab/mmdetection/tree/main/configs/panoptic_fpn)         | Panoptic Segmentation |      Y      |    Y     |  N   |   N   |    N     |
+|           [MaskFormer](https://github.com/open-mmlab/mmdetection/tree/main/configs/maskformer)           | Panoptic Segmentation |      Y      |    Y     |  N   |   N   |    N     |
+| [Mask2Former](https://github.com/open-mmlab/mmdetection/tree/main/configs/mask2former)[\*](#mask2former) | Panoptic Segmentation |      Y      |    Y     |  N   |   N   |    N     |
+
+## 注意事项
+
+- 强烈建议使用`TensorRT>=8.4`来转换基于 `transformer` 的模型.
+- <i id="mask2former">Mask2Former</i> 请使用 `TensorRT>=8.6.1` 以保证动态尺寸正常推理.
diff --git a/mmdeploy/apis/onnx/export.py b/mmdeploy/apis/onnx/export.py
index 92a9002d8d..1b788e842d 100644
--- a/mmdeploy/apis/onnx/export.py
+++ b/mmdeploy/apis/onnx/export.py
@@ -127,7 +127,14 @@ def wrapper(*arg, **kwargs):
             patched_model.forward = wrap_forward(patched_model.forward)
             patched_model.forward = partial(patched_model.forward,
                                             **input_metas)
-
+        # force to export on cpu
+        patched_model = patched_model.cpu()
+        if isinstance(args, torch.Tensor):
+            args = args.cpu()
+        elif isinstance(args, (tuple, list)):
+            args = [_.cpu() for _ in args]
+        else:
+            raise RuntimeError(f'Not supported args: {args}')
         torch.onnx.export(
             patched_model,
             args,
diff --git a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py
index 5fe9874686..e9f3b6cf7b 100644
--- a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py
+++ b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py
@@ -1,4 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+import copy
 import math
 from functools import partial
 from typing import Any, List, Optional, Sequence, Tuple, Union
@@ -97,8 +98,10 @@ def __clear_outputs(
 
         for i in range(batch_size):
             inds = test_outputs[0][i, :, 4] > 0.0
-            for output_id in range(num_outputs):
-                outputs[output_id][i] = test_outputs[output_id][i, inds, ...]
+            outputs[0][i] = test_outputs[0][i, inds, ...]
+            outputs[1][i] = test_outputs[1][i, inds, ...]
+            if num_outputs >= 3 and test_outputs[2][i] is not None:
+                outputs[2][i] = test_outputs[2][i, inds, ...]
         return outputs
 
     @staticmethod
@@ -171,42 +174,34 @@ def postprocessing_masks(det_bboxes: Union[np.ndarray, Tensor],
         result_masks = torch.cat(result_masks, 1)
         return result_masks.squeeze(0)
 
-    def forward(self,
-                inputs: torch.Tensor,
-                data_samples: Optional[List[BaseDataElement]] = None,
-                mode: str = 'predict',
-                **kwargs) -> Any:
-        """The model forward.
-
-        Args:
-            inputs (torch.Tensor): The input tensors
-            data_samples (List[BaseDataElement], optional): The data samples.
-                Defaults to None.
-            mode (str, optional): forward mode, only support `predict`.
-
-        Returns:
-            Any: Model output.
-        """
-        assert mode == 'predict', 'Deploy model only allow mode=="predict".'
-        inputs = inputs.contiguous()
-        outputs = self.predict(inputs)
-        outputs = End2EndModel.__clear_outputs(outputs)
+    def postprocessing_results(self,
+                               batch_dets: torch.Tensor,
+                               batch_labels: torch.Tensor,
+                               batch_masks: torch.Tensor,
+                               data_samples: List[BaseDataElement],
+                               rescale: bool = True):
+        """Post-processing dets, labels, masks."""
+        batch_size = len(batch_dets)
+        tmp_outputs = [batch_dets, batch_labels]
+        has_mask = batch_masks is not None
+        if has_mask:
+            tmp_outputs.append(batch_masks)
+        outputs = End2EndModel.__clear_outputs(tmp_outputs)
         batch_dets, batch_labels = outputs[:2]
-        batch_masks = outputs[2] if len(outputs) == 3 else None
-        batch_size = inputs.shape[0]
+        batch_masks = outputs[2] if has_mask else None
         img_metas = [data_sample.metainfo for data_sample in data_samples]
-        results = []
-        rescale = kwargs.get('rescale', True)
         model_type = self.model_cfg.model.type if \
             self.model_cfg is not None else None
         for i in range(batch_size):
             dets, labels = batch_dets[i], batch_labels[i]
-            result = InstanceData()
-
+            pred_instances = InstanceData()
+            device = dets.device
+            labels = labels.to(device)
             bboxes = dets[:, :4]
             scores = dets[:, 4]
-            # perform rescale
-            if rescale and 'scale_factor' in img_metas[i]:
+            scale_factor = bboxes.new_ones(1, 4)
+            # get scale_factor
+            if 'scale_factor' in img_metas[i]:
                 scale_factor = img_metas[i]['scale_factor']
                 if isinstance(scale_factor, (list, tuple, np.ndarray)):
                     if len(scale_factor) == 2:
@@ -215,6 +210,7 @@ def forward(self,
                             [scale_factor, scale_factor])
                     scale_factor = np.array(scale_factor)[None, :]  # [1,4]
                 scale_factor = torch.from_numpy(scale_factor).to(dets)
+            if rescale:
                 bboxes /= scale_factor
 
             # Most of models in mmdetection 3.x use `pad_param`, but some
@@ -228,19 +224,17 @@ def forward(self,
             elif 'border' in img_metas[i]:
                 pad_key = 'border'
             if pad_key is not None:
-                scale_factor = img_metas[i].get('scale_factor',
-                                                np.array([1., 1.]))
                 x_off = img_metas[i][pad_key][2] / scale_factor[1]
                 y_off = img_metas[i][pad_key][0] / scale_factor[0]
                 bboxes[:, ::2] -= x_off
                 bboxes[:, 1::2] -= y_off
                 bboxes *= (bboxes > 0)
 
-            result.scores = scores
-            result.bboxes = bboxes
+            pred_instances.scores = scores
+            pred_instances.bboxes = bboxes
             if model_type in ['SOLO', 'SOLOv2']:
-                result.bboxes = bboxes.new_zeros(bboxes.shape)
-            result.labels = labels
+                pred_instances.bboxes = bboxes.new_zeros(bboxes.shape)
+            pred_instances.labels = labels
 
             if batch_masks is not None:
                 masks = batch_masks[i]
@@ -248,7 +242,6 @@ def forward(self,
                 ori_h, ori_w = img_metas[i]['ori_shape'][:2]
                 export_postprocess_mask = False
                 if self.deploy_cfg is not None:
-
                     mmdet_deploy_cfg = get_post_processing_params(
                         self.deploy_cfg)
                     # this flag enable postprocess when export.
@@ -261,23 +254,45 @@ def forward(self,
                     masks = masks[:, :img_h, :img_w]
                 # avoid to resize masks with zero dim
                 if export_postprocess_mask and rescale and masks.shape[0] != 0:
-                    masks = torch.nn.functional.interpolate(
+                    masks = F.interpolate(
                         masks.unsqueeze(0),
                         size=[
-                            math.ceil(masks.shape[-2] /
-                                      img_metas[i]['scale_factor'][0]),
-                            math.ceil(masks.shape[-1] /
-                                      img_metas[i]['scale_factor'][1])
+                            math.ceil(masks.shape[-2] / scale_factor[0]),
+                            math.ceil(masks.shape[-1] / scale_factor[1])
                         ])[..., :ori_h, :ori_w]
                     masks = masks.squeeze(0)
                 if masks.dtype != bool:
                     masks = masks >= 0.5
                 # aligned with mmdet to easily convert to numpy
-                masks = masks.cpu()
-                result.masks = masks
-            data_samples[i].pred_instances = result
-            results.append(data_samples[i])
-        return results
+                masks = masks.to(device)
+                pred_instances.masks = masks
+
+            data_samples[i].pred_instances = pred_instances
+
+    def forward(self,
+                inputs: torch.Tensor,
+                data_samples: List[BaseDataElement],
+                mode: str = 'predict',
+                **kwargs) -> Any:
+        """The model forward.
+
+        Args:
+            inputs (torch.Tensor): The input tensors
+            data_samples (List[BaseDataElement]): The data samples.
+                Defaults to None.
+            mode (str, optional): forward mode, only support `predict`.
+
+        Returns:
+            Any: Model output.
+        """
+        assert mode == 'predict', 'Deploy model only allow mode=="predict".'
+        inputs = inputs.contiguous()
+        outputs = self.predict(inputs)
+        batch_dets, batch_labels = outputs[:2]
+        batch_masks = outputs[2] if len(outputs) >= 3 else None
+        self.postprocessing_results(batch_dets, batch_labels, batch_masks,
+                                    data_samples)
+        return data_samples
 
     def predict(self, imgs: Tensor) -> Tuple[np.ndarray, np.ndarray]:
         """The interface for predict.
@@ -294,6 +309,155 @@ def predict(self, imgs: Tensor) -> Tuple[np.ndarray, np.ndarray]:
         return outputs
 
 
+@__BACKEND_MODEL.register_module('panoptic_end2end')
+class PanOpticEnd2EndModel(End2EndModel):
+    """End to end model for inference of PanOptic Segmentation.
+
+    Args:
+        backend (Backend): The backend enum, specifying backend type.
+        backend_files (Sequence[str]): Paths to all required backend files
+                (e.g. '.onnx' for ONNX Runtime, '.param' and '.bin' for ncnn).
+        device (str): A string specifying device type.
+        deploy_cfg (str|Config): Deployment config file or loaded Config
+            object.
+        data_preprocessor (dict|nn.Module): The data preprocessor.
+    """
+
+    def __init__(self,
+                 backend: Backend,
+                 backend_files: Sequence[str],
+                 device: str,
+                 deploy_cfg: Union[str, Config],
+                 model_cfg: Optional[Union[str, Config]] = None,
+                 data_preprocessor: Optional[Union[dict, nn.Module]] = None,
+                 **kwargs):
+        super(PanOpticEnd2EndModel, self).__init__(
+            backend,
+            backend_files,
+            device,
+            deploy_cfg,
+            model_cfg=model_cfg,
+            data_preprocessor=data_preprocessor,
+            **kwargs)
+        from mmdet.models.seg_heads import (HeuristicFusionHead,
+                                            MaskFormerFusionHead)
+        obj_dict = {
+            'HeuristicFusionHead': HeuristicFusionHead,
+            'MaskFormerFusionHead': MaskFormerFusionHead
+        }
+        head_args = self.model_cfg.model.panoptic_fusion_head.copy()
+        test_cfg = self.model_cfg.model.test_cfg
+        # deal with PanopticFPN
+        if 'panoptic' in test_cfg:
+            test_cfg = test_cfg['panoptic']
+        head_args['test_cfg'] = test_cfg
+        self.fusion_head_type = head_args.pop('type')
+        self.fusion_head = obj_dict[self.fusion_head_type](**head_args)
+
+    def forward(self,
+                inputs: torch.Tensor,
+                data_samples: List[BaseDataElement],
+                mode: str = 'predict',
+                **kwargs) -> Any:
+        """The model forward.
+
+        Args:
+            inputs (torch.Tensor): The input tensors
+            data_samples (List[BaseDataElement], optional): The data samples.
+                Defaults to None.
+            mode (str, optional): forward mode, only support `predict`.
+
+        Returns:
+            Any: Model output.
+        """
+        assert mode == 'predict', 'Deploy model only allow mode=="predict".'
+        model_type = self.model_cfg.model.type
+
+        inputs = inputs.contiguous()
+        outputs = self.predict(inputs)
+        rescale = kwargs.get('rescale', True)
+
+        if model_type == 'PanopticFPN':
+            batch_dets, batch_labels, batch_masks = outputs[:3]
+            # fix int32 and int64 mismatch in fusion head
+            batch_labels = batch_labels.to(torch.long)
+            batch_semseg = outputs[3]
+            tmp_data_samples = copy.deepcopy(data_samples)
+            self.postprocessing_results(batch_dets, batch_labels, batch_masks,
+                                        tmp_data_samples)
+            masks_results = [ds.pred_instances for ds in tmp_data_samples]
+            img_metas = [data_sample.metainfo for data_sample in data_samples]
+            seg_pred_list = []
+            for i in range(len(data_samples)):
+                h, w = img_metas[i]['img_shape']
+                seg_pred = batch_semseg[i][:, :h, :w]
+                h, w = img_metas[i]['ori_shape']
+                seg_pred = F.interpolate(
+                    seg_pred[None],
+                    size=(h, w),
+                    mode='bilinear',
+                    align_corners=False)[0]
+                seg_pred_list.append(seg_pred)
+            semseg_results = self.fusion_head.predict(masks_results,
+                                                      seg_pred_list)
+            results_list = [dict(pan_results=res) for res in semseg_results]
+        elif model_type in ['MaskFormer', 'Mask2Former']:
+            batch_cls_logits = outputs[0]
+            batch_mask_logits = outputs[1]
+
+            results_list = self.fusion_head.predict(
+                batch_cls_logits,
+                batch_mask_logits,
+                data_samples,
+                rescale=rescale)
+
+        data_samples = self.add_pred_to_datasample(data_samples, results_list)
+        return data_samples
+
+    @staticmethod
+    def add_pred_to_datasample(
+            data_samples: List[BaseDataElement],
+            results_list: List[dict]) -> List[BaseDataElement]:
+        """Add predictions to `DetDataSample`.
+
+        Args:
+            data_samples (list[:obj:`DetDataSample`], optional): A batch of
+                data samples that contain annotations and predictions.
+            results_list (List[dict]): Instance segmentation, segmantic
+                segmentation and panoptic segmentation results.
+
+        Returns:
+            list[:obj:`DetDataSample`]: Detection results of the
+            input images. Each DetDataSample usually contain
+            'pred_instances' and `pred_panoptic_seg`. And the
+            ``pred_instances`` usually contains following keys.
+
+                - scores (Tensor): Classification scores, has a shape
+                    (num_instance, )
+                - labels (Tensor): Labels of bboxes, has a shape
+                    (num_instances, ).
+                - bboxes (Tensor): Has a shape (num_instances, 4),
+                    the last dimension 4 arrange as (x1, y1, x2, y2).
+                - masks (Tensor): Has a shape (num_instances, H, W).
+
+            And the ``pred_panoptic_seg`` contains the following key
+
+                - sem_seg (Tensor): panoptic segmentation mask, has a
+                    shape (1, h, w).
+        """
+        for data_sample, pred_results in zip(data_samples, results_list):
+            if 'pan_results' in pred_results:
+                data_sample.pred_panoptic_seg = pred_results['pan_results']
+
+            if 'ins_results' in pred_results:
+                data_sample.pred_instances = pred_results['ins_results']
+
+            assert 'sem_results' not in pred_results, 'segmantic ' \
+                'segmentation results are not supported yet.'
+
+        return data_samples
+
+
 @__BACKEND_MODEL.register_module('single_stage')
 class PartitionSingleStageModel(End2EndModel):
     """Partitioned single stage detection model.
diff --git a/mmdeploy/codebase/mmdet/models/__init__.py b/mmdeploy/codebase/mmdet/models/__init__.py
index ee7b60a4e2..8da5d86571 100644
--- a/mmdeploy/codebase/mmdet/models/__init__.py
+++ b/mmdeploy/codebase/mmdet/models/__init__.py
@@ -5,6 +5,7 @@
 from . import layers  # noqa: F401,F403
 from . import necks  # noqa: F401,F403
 from . import roi_heads  # noqa: F401,F403
+from . import seg_heads  # noqa: F401,F403
 from . import task_modules  # noqa: F401,F403
 from . import transformer  # noqa: F401,F403
 from . import utils  # noqa: F401,F403
diff --git a/mmdeploy/codebase/mmdet/models/detectors/__init__.py b/mmdeploy/codebase/mmdet/models/detectors/__init__.py
index 2c0a2f3ed5..460694aa72 100644
--- a/mmdeploy/codebase/mmdet/models/detectors/__init__.py
+++ b/mmdeploy/codebase/mmdet/models/detectors/__init__.py
@@ -1,6 +1,8 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from . import base_detr, single_stage, single_stage_instance_seg, two_stage
+from . import (base_detr, maskformer, panoptic_two_stage_segmentor,
+               single_stage, single_stage_instance_seg, two_stage)
 
 __all__ = [
-    'base_detr', 'single_stage', 'single_stage_instance_seg', 'two_stage'
+    'base_detr', 'single_stage', 'single_stage_instance_seg', 'two_stage',
+    'panoptic_two_stage_segmentor', 'maskformer'
 ]
diff --git a/mmdeploy/codebase/mmdet/models/detectors/maskformer.py b/mmdeploy/codebase/mmdet/models/detectors/maskformer.py
new file mode 100644
index 0000000000..c430ae9126
--- /dev/null
+++ b/mmdeploy/codebase/mmdet/models/detectors/maskformer.py
@@ -0,0 +1,54 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
+from mmdeploy.core import FUNCTION_REWRITER
+from mmdeploy.utils import is_dynamic_shape
+
+
+@FUNCTION_REWRITER.register_rewriter('mmdet.models.detectors.maskformer.'
+                                     'MaskFormer.forward')
+def maskformer__forward(self,
+                        batch_inputs,
+                        data_samples,
+                        mode='tensor',
+                        **kwargs):
+    """Rewrite `forward` for default backend. Support configured dynamic/static
+    shape for model input and return detection result as Tensor instead of
+    numpy array.
+
+    Args:
+        batch_inputs (Tensor): Inputs with shape (N, C, H, W).
+        batch_data_samples (List[:obj:`DetDataSample`]): The Data
+            Samples. It usually includes information such as
+            `gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`.
+        rescale (bool): Whether to rescale the results.
+            Defaults to True.
+
+    Returns:
+        tuple[Tensor, Tensor, Tensor, Tensor]:
+            (bboxes, labels, masks, semseg), `bboxes` of shape [N, num_det, 5],
+            `labels` of shape [N, num_det], `masks` of shape [N, roi_H, roi_W],
+            `semseg` of shape [N, num_sem_class, sem_H, sem_W].
+    """
+    ctx = FUNCTION_REWRITER.get_context()
+    deploy_cfg = ctx.cfg
+
+    # get origin input shape as tensor to support onnx dynamic shape
+    is_dynamic_flag = is_dynamic_shape(deploy_cfg)
+    img_shape = torch._shape_as_tensor(batch_inputs)[2:]
+    if not is_dynamic_flag:
+        img_shape = [int(val) for val in img_shape]
+    # set the metainfo
+    # note that we can not use `set_metainfo`, deepcopy would crash the
+    # onnx trace.
+    for data_sample in data_samples:
+        data_sample.set_field(
+            name='img_shape', value=img_shape, field_type='metainfo')
+        data_sample.set_field(
+            name='batch_input_shape', value=img_shape, field_type='metainfo')
+
+    feats = self.extract_feat(batch_inputs)
+    mask_cls_results, mask_pred_results = self.panoptic_head.predict(
+        feats, data_samples)
+    # do not export panoptic_fusion_head
+    return mask_cls_results, mask_pred_results
diff --git a/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py b/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py
new file mode 100644
index 0000000000..8fa4475723
--- /dev/null
+++ b/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py
@@ -0,0 +1,65 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+
+import torch
+
+from mmdeploy.core import FUNCTION_REWRITER
+from mmdeploy.utils import is_dynamic_shape
+
+
+@FUNCTION_REWRITER.register_rewriter(
+    'mmdet.models.detectors.panoptic_two_stage_segmentor.'
+    'TwoStagePanopticSegmentor.forward')
+def two_stage_panoptic_segmentor__forward(self,
+                                          batch_inputs,
+                                          data_samples,
+                                          mode='tensor',
+                                          **kwargs):
+    """Rewrite `forward` for default backend. Support configured dynamic/static
+    shape for model input and return detection result as Tensor instead of
+    numpy array.
+
+    Args:
+        batch_inputs (Tensor): Inputs with shape (N, C, H, W).
+        batch_data_samples (List[:obj:`DetDataSample`]): The Data
+            Samples. It usually includes information such as
+            `gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`.
+        rescale (bool): Whether to rescale the results.
+            Defaults to True.
+
+    Returns:
+        tuple[Tensor, Tensor, Tensor, Tensor]:
+            (bboxes, labels, masks, semseg), `bboxes` of shape [N, num_det, 5],
+            `labels` of shape [N, num_det], `masks` of shape [N, roi_H, roi_W],
+            `semseg` of shape [N, num_sem_class, sem_H, sem_W].
+    """
+    ctx = FUNCTION_REWRITER.get_context()
+    deploy_cfg = ctx.cfg
+
+    # get origin input shape as tensor to support onnx dynamic shape
+    is_dynamic_flag = is_dynamic_shape(deploy_cfg)
+    img_shape = torch._shape_as_tensor(batch_inputs)[2:].to(
+        batch_inputs.device)
+    if not is_dynamic_flag:
+        img_shape = [int(val) for val in img_shape]
+    # set the metainfo
+    # note that we can not use `set_metainfo`, deepcopy would crash the
+    # onnx trace.
+    for data_sample in data_samples:
+        data_sample.set_field(
+            name='img_shape', value=img_shape, field_type='metainfo')
+        data_sample.set_field(
+            name='batch_input_shape', value=img_shape, field_type='metainfo')
+
+    img_metas = [data_samples.metainfo for data_samples in data_samples]
+    x = self.extract_feat(batch_inputs)
+    if data_samples[0].get('proposals', None) is None:
+        proposals = self.rpn_head.predict(x, data_samples, rescale=False)
+    else:
+        proposals = [data_sample.proposals for data_sample in data_samples]
+
+    bboxes, labels, masks = self.roi_head.predict(
+        x, proposals, data_samples, rescale=False)
+
+    semseg = self.semantic_head.predict(x, img_metas, rescale=False)
+    # do not export panoptic_fusion_head
+    return bboxes, labels, masks, semseg
diff --git a/mmdeploy/codebase/mmdet/models/detectors/single_stage.py b/mmdeploy/codebase/mmdet/models/detectors/single_stage.py
index 5f3872c8b6..b25f70c028 100644
--- a/mmdeploy/codebase/mmdet/models/detectors/single_stage.py
+++ b/mmdeploy/codebase/mmdet/models/detectors/single_stage.py
@@ -1,5 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-import copy
 
 import torch
 from mmdet.models.detectors.base import ForwardResults
@@ -31,9 +30,6 @@ def _set_metainfo(data_samples, img_shape):
 
     Code in this function cannot be traced by fx.
     """
-
-    # fx can not trace deepcopy correctly
-    data_samples = copy.deepcopy(data_samples)
     if data_samples is None:
         data_samples = [DetDataSample()]
 
diff --git a/mmdeploy/codebase/mmdet/models/detectors/two_stage.py b/mmdeploy/codebase/mmdet/models/detectors/two_stage.py
index d0bd140003..477571327e 100644
--- a/mmdeploy/codebase/mmdet/models/detectors/two_stage.py
+++ b/mmdeploy/codebase/mmdet/models/detectors/two_stage.py
@@ -1,5 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-import copy
 
 import torch
 from mmdet.models.detectors.base import ForwardResults
@@ -63,7 +62,6 @@ def two_stage_detector__forward(self,
                 (num_instances, ).
     """
     ctx = FUNCTION_REWRITER.get_context()
-    data_samples = copy.deepcopy(data_samples)
     deploy_cfg = ctx.cfg
 
     # get origin input shape as tensor to support onnx dynamic shape
diff --git a/mmdeploy/codebase/mmdet/models/seg_heads/__init__.py b/mmdeploy/codebase/mmdet/models/seg_heads/__init__.py
new file mode 100644
index 0000000000..db03f1f20c
--- /dev/null
+++ b/mmdeploy/codebase/mmdet/models/seg_heads/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from . import base_semantic_head
+
+__all__ = ['base_semantic_head']
diff --git a/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py b/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py
new file mode 100644
index 0000000000..8ac2982812
--- /dev/null
+++ b/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py
@@ -0,0 +1,31 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+
+import torch.nn.functional as F
+
+from mmdeploy.core import FUNCTION_REWRITER
+
+
+@FUNCTION_REWRITER.register_rewriter(
+    'mmdet.models.seg_heads.base_semantic_head.BaseSemanticHead.predict')
+def base_semantic_head__predict(self, x, batch_img_metas, rescale=False):
+    """Rewrite `predict` for default backend. Support configured dynamic/static
+    shape for model input and return semantic-segmentation result as Tensor
+    instead of numpy array.
+
+    Args:
+        x (Union[Tensor, Tuple[Tensor]]): Feature maps.
+        batch_img_metas (List[dict]): List of image information.
+        rescale (bool): Whether to rescale the results.
+            Defaults to False.
+
+    Returns:
+        Tensor: `semseg` of shape [N, num_sem_class, H, W]
+    """
+    seg_preds = self.forward(x)['seg_preds']
+    img_shape = batch_img_metas[0]['batch_input_shape']
+    seg_preds = F.interpolate(
+        seg_preds,
+        size=(img_shape[0], img_shape[1]),
+        mode='bilinear',
+        align_corners=False)
+    return seg_preds
diff --git a/mmdeploy/mmcv/ops/multi_scale_deform_attn.py b/mmdeploy/mmcv/ops/multi_scale_deform_attn.py
index 7a5ccccba8..8649294f55 100644
--- a/mmdeploy/mmcv/ops/multi_scale_deform_attn.py
+++ b/mmdeploy/mmcv/ops/multi_scale_deform_attn.py
@@ -1,4 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+
 from mmdeploy.core import SYMBOLIC_REWRITER
 
 
diff --git a/tests/regression/mmdet.yml b/tests/regression/mmdet.yml
index 7e35e8a1b6..ed16bff92f 100644
--- a/tests/regression/mmdet.yml
+++ b/tests/regression/mmdet.yml
@@ -15,8 +15,8 @@ globals:
       tolerance: 1 # metric ±n%
       multi_value: 100
     PQ:
-      metric_key: '?'
-      tolerance: 0.1 # metric ±n%
+      metric_key: 'coco_panoptic/PQ'
+      tolerance: 0.5 # metric ±n%
   convert_image: &convert_image
     input_img: *input_img
     test_img: *test_img
@@ -381,3 +381,39 @@ models:
       - *pipeline_seg_ort_dynamic_fp32
       - *pipeline_seg_trt_dynamic_fp32
       - *pipeline_seg_openvino_dynamic_fp32
+
+  - name: PanopticFPN
+    metafile: configs/panoptic_fpn/metafile.yml
+    model_configs:
+      - configs/panoptic_fpn/panoptic-fpn_r50_fpn_1x_coco.py
+    pipelines:
+      - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_onnxruntime_dynamic.py
+        convert_image: *convert_image
+        backend_test: False
+      - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_tensorrt_dynamic-352x512-1344x1344.py
+        convert_image: *convert_image
+        backend_test: True
+
+  - name: MaskFormer
+    metafile: configs/maskformer/metafile.yml
+    model_configs:
+      - configs/maskformer/maskformer_r50_ms-16xb1-75e_coco.py
+    pipelines:
+      - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py
+        convert_image: *convert_image
+        backend_test: False
+      - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py
+        convert_image: *convert_image
+        backend_test: True
+
+  - name: Mask2Former
+    metafile: configs/mask2former/metafile.yml
+    model_configs:
+      - configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py
+    pipelines:
+      - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py
+        convert_image: *convert_image
+        backend_test: False
+      - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-800x1344.py
+        convert_image: *convert_image
+        backend_test: False