diff --git a/README.md b/README.md
index 7d0d1848..200e178f 100644
--- a/README.md
+++ b/README.md
@@ -51,13 +51,13 @@ pip install -r requirements.txt
 Single GPU
 
 ```shell
-python tools/train.py --batch 8 --conf configs/yolov6s_finetune.py --data data/coco.yaml --device 0
+python tools/train.py --batch 8 --conf configs/yolov6s_seg_finetune.py --data data/coco.yaml --device 0
 ```
 
 Multi GPUs (DDP mode recommended)
 
 ```shell
-python -m torch.distributed.launch --nproc_per_node 8 tools/train.py --batch 64 --conf configs/yolov6s_finetune.py --data data/coco.yaml --device 0,1,2,3,4,5,6,7
+python -m torch.distributed.launch --nproc_per_node 8 tools/train.py --batch 64 --conf configs/yolov6s_seg_finetune.py --data data/coco.yaml --device 0,1,2,3,4,5,6,7
 ```
 - fuse_ab: Not supported in current version
 - conf: select config file to specify network/optimizer/hyperparameters. We recommend to apply yolov6n/s/m/l_finetune.py when training on your custom dataset.
@@ -88,7 +88,7 @@ python -m torch.distributed.launch --nproc_per_node 8 tools/train.py --batch 64
 Reproduce mAP on COCO val2017 dataset with 640×640 resolution
 
 ```shell
-python tools/eval.py --data data/coco.yaml --batch 32 --weights yolov6s.pt --task val
+python tools/eval.py --data data/coco.yaml --batch 32 --weights yolov6s_seg.pt --task val
 ```
 
 </details>
@@ -102,11 +102,11 @@ First, download a pretrained model from the YOLOv6 [release](https://github.com/
 Second, run inference with `tools/infer.py`
 
 ```shell
-python tools/infer.py --weights yolov6s.pt --source img.jpg / imgdir / video.mp4
+python tools/infer.py --weights yolov6s_seg.pt --source img.jpg / imgdir / video.mp4
 ```
 If you want to inference on local camera or  web camera, you can run:
 ```shell
-python tools/infer.py --weights yolov6s.pt --webcam --webcam-addr 0
+python tools/infer.py --weights yolov6s_seg.pt --webcam --webcam-addr 0
 ```
 `webcam-addr` can be local camera number id or rtsp address.
 Maybe you want to eval a solo-head model, remember to add the *--issolo* parameter.
diff --git a/Train_custom_data.md b/Train_custom_data.md
new file mode 100644
index 00000000..2a281d0b
--- /dev/null
+++ b/Train_custom_data.md
@@ -0,0 +1,164 @@
+# Train Custom Data
+
+This guidence explains how to train your own custom data with YOLOv6 (take fine-tuning YOLOv6-s model for example).
+
+## 0. Before you start
+
+Clone this repo and follow README.md to install requirements in a Python3.8 environment.
+```shell
+$ git clone https://github.com/meituan/YOLOv6.git
+```
+
+## 1. Prepare your own dataset
+
+**Step 1**: Prepare your own dataset with images. For labeling images, you can use tools like [Labelme](https://github.com/wkentaro/labelme) or [Roboflow](https://roboflow.com/).
+
+**Step 2**: Generate label files in YOLO format.
+
+One image corresponds to one label file, and the label format example is presented as below.
+
+```json
+# class_id RLE&polygon(n * (x_coord, y_coord))
+0 0.0503437 0.0314644 0.0688125 0.114603 0.0604219 0.247197 0.0654531 0.330335 0.0436406 0.359561 0.0201406 0.361799 0.00335937 0.294372 0.00671875 0.229205 0.0134219 0.112364 0.0251719 0.0359623 0.0268594 0.00449791
+1 0.663672 0.412218 0.603688 0.425167 0.528234 0.42 0.475984 0.399268 0.417938 0.391485 0.348281 0.383724 0.301844 0.414812 0.317328 0.430356 0.3715 0.469205 0.458578 0.508075 0.510813 0.515837 0.574672 0.495126 0.628844 0.476987 0.657875 0.453661 0.690766 0.435544
+```
+
+
+- Each row represents one object.
+- Class id starts from `0`.
+- RLE&polygon means n points, there should be 2n numbers which means n pairs of (x_coord, y_coord)
+
+**Step 3**: Organize directories.
+
+Organize your directory of custom dataset as follows:
+
+```shell
+custom_dataset
+├── images
+│   ├── train
+│   │   ├── train0.jpg
+│   │   └── train1.jpg
+│   ├── val
+│   │   ├── val0.jpg
+│   │   └── val1.jpg
+│   └── test
+│       ├── test0.jpg
+│       └── test1.jpg
+└── labels
+    ├── train
+    │   ├── train0.txt
+    │   └── train1.txt
+    ├── val
+    │   ├── val0.txt
+    │   └── val1.txt
+    └── test
+        ├── test0.txt
+        └── test1.txt
+```
+
+**Step 4**: Create `dataset.yaml` in `$YOLOv6_DIR/data`.
+
+```yaml
+# Please insure that your custom_dataset are put in same parent dir with YOLOv6_DIR
+train: ../custom_dataset/images/train # train images
+val: ../custom_dataset/images/val # val images
+test: ../custom_dataset/images/test # test images (optional)
+
+# whether it is coco dataset, only coco dataset should be set to True.
+is_coco: False
+
+# Classes
+nc: 20  # number of classes
+names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
+        'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']  # class names
+```
+
+## 2. Create a config file
+
+We use a config file to specify the network structure and training setting, including  optimizer and data augmentation hyperparameters.
+
+If you create a new config file, please put it under the `configs` directory.
+Or just use the provided config file in `$YOLOV6_HOME/configs/*_finetune.py`. Download the pretrained model which you want to use from [here](https://github.com/meituan/YOLOv6#benchmark).
+
+```python
+## YOLOv6s Model config file
+model = dict(
+    type='YOLOv6s',
+    pretrained='./weights/yolov6s.pt', # download the pretrained model from YOLOv6 github if you're going to use the pretrained model
+    depth_multiple = 0.33,
+    width_multiple = 0.50,
+    ...
+)
+solver=dict(
+    optim='SGD',
+    lr_scheduler='Cosine',
+    ...
+)
+
+data_aug = dict(
+    hsv_h=0.015,
+    hsv_s=0.7,
+    hsv_v=0.4,
+    ...
+)
+
+One more thing, there are 4 more parameters in seg model,
+**isseg** means it's a seg model or not
+**issolo** means it's a solo head or a yolact head
+**npr** and **nm** mean how many features net should extract and use them to get the mask.
+```
+
+
+
+## 3. Train
+
+Single GPU
+
+```shell
+# Be sure to open use_dfl mode in config file (use_dfl=True, reg_max=16) if you want to do self-distillation training further.
+python tools/train.py --batch 32 --conf configs/yolov6s_finetune.py --data data/dataset.yaml --fuse_ab --device 0
+```
+
+Multi GPUs (DDP mode recommended)
+
+```shell
+# Be sure to open use_dfl mode in config file (use_dfl=True, reg_max=16) if you want to do self-distillation training further.
+python -m torch.distributed.launch --nproc_per_node 8 tools/train.py --batch 256 --conf configs/yolov6s_finetune.py --data data/dataset.yaml --fuse_ab --device 0,1,2,3,4,5,6,7
+```
+
+Self-distillation training
+
+```shell
+# Be sure to open use_dfl mode in config file (use_dfl=True, reg_max=16).
+python -m torch.distributed.launch --nproc_per_node 8 tools/train.py --batch 256 --conf configs/yolov6s_finetune.py --data data/dataset.yaml --distill --teacher_model_path your_model_path --device 0,1,2,3,4,5,6,7
+```
+
+
+## 4. Evaluation
+
+```shell
+python tools/eval.py --data data/data.yaml  --weights output_dir/name/weights/best_ckpt.pt --task val --device 0
+```
+
+
+
+## 5. Inference
+
+```shell
+python tools/infer.py --weights output_dir/name/weights/best_ckpt.pt --source img.jpg --device 0
+```
+
+
+
+## 6. Deployment
+
+Export as [ONNX](https://github.com/meituan/YOLOv6/tree/main/deploy/ONNX) Format
+
+```shell
+# Without NMS OP, pure model.
+python deploy/ONNX/export_onnx.py --weights output_dir/name/weights/best_ckpt.pt --simplify --device 0
+# If you want to run with ONNX-Runtime (NMS integrated).
+python deploy/ONNX/export_onnx.py --weights output_dir/name/weights/best_ckpt.pt --simplify --device 0 --dynamic-batch --end2end --ort
+# If you want to run with TensorRT (NMS integrated).
+python deploy/ONNX/export_onnx.py --weights output_dir/name/weights/best_ckpt.pt --simplify --device 0 --dynamic-batch --end2end
+```
diff --git a/assets/speed_comparision_seg.png b/assets/speed_comparision_seg.png
new file mode 100644
index 00000000..0a17d4fb
Binary files /dev/null and b/assets/speed_comparision_seg.png differ
diff --git a/configs/solo/yolov6l_solo.py b/configs/exp/yolov6l_solo.py
similarity index 100%
rename from configs/solo/yolov6l_solo.py
rename to configs/exp/yolov6l_solo.py
diff --git a/configs/solo/yolov6m_solo.py b/configs/exp/yolov6m_solo.py
similarity index 100%
rename from configs/solo/yolov6m_solo.py
rename to configs/exp/yolov6m_solo.py
diff --git a/configs/solo/yolov6n_solo.py b/configs/exp/yolov6n_solo.py
similarity index 100%
rename from configs/solo/yolov6n_solo.py
rename to configs/exp/yolov6n_solo.py
diff --git a/configs/solo/yolov6s_solo.py b/configs/exp/yolov6s_solo.py
similarity index 100%
rename from configs/solo/yolov6s_solo.py
rename to configs/exp/yolov6s_solo.py
diff --git a/configs/solo/yolov6x_solo.py b/configs/exp/yolov6x_solo.py
similarity index 100%
rename from configs/solo/yolov6x_solo.py
rename to configs/exp/yolov6x_solo.py
diff --git a/configs/yolov6l_seg_finetune.py b/configs/yolov6l_seg_finetune.py
new file mode 100644
index 00000000..8121af90
--- /dev/null
+++ b/configs/yolov6l_seg_finetune.py
@@ -0,0 +1,72 @@
+# YOLOv6l-seg model
+model = dict(
+    type='YOLOv6l',
+    pretrained='weights/yolov6l_seg.pt',
+    depth_multiple=1.0,
+    width_multiple=1.0,
+    backbone=dict(
+        type='CSPBepBackbone',
+        num_repeats=[1, 6, 12, 18, 6],
+        out_channels=[64, 128, 256, 512, 1024],
+        csp_e=float(1)/2,
+        fuse_P2=True,
+        ),
+    neck=dict(
+        type='CSPRepBiFPANNeck',
+        num_repeats=[12, 12, 12, 12],
+        out_channels=[256, 128, 128, 256, 256, 512],
+        csp_e=float(1)/2,
+        ),
+    head=dict(
+        type='EffiDeHead',
+        in_channels=[128, 256, 512],
+        num_layers=3,
+        begin_indices=24,
+        npr=256,
+        nm=32,
+        isseg=True,
+        issolo=False,
+        anchors=3,
+        anchors_init=[[10,13, 19,19, 33,23],
+                      [30,61, 59,59, 59,119],
+                      [116,90, 185,185, 373,326]],
+        out_indices=[17, 20, 23],
+        strides=[8, 16, 32],
+        atss_warmup_epoch=0,
+        iou_type='giou',
+        use_dfl=True,
+        reg_max=16, #if use_dfl is False, please set reg_max to 0
+        distill_weight={
+            'class': 2.0,
+            'dfl': 1.0,
+        },
+    )
+)
+
+solver = dict(
+    optim='SGD',
+    lr_scheduler='Cosine',
+    lr0=0.0032,
+    lrf=0.12,
+    momentum=0.843,
+    weight_decay=0.00036,
+    warmup_epochs=2.0,
+    warmup_momentum=0.5,
+    warmup_bias_lr=0.05
+)
+
+data_aug = dict(
+    hsv_h=0.0138,
+    hsv_s=0.664,
+    hsv_v=0.464,
+    degrees=0.373,
+    translate=0.245,
+    scale=0.898,
+    shear=0.602,
+    flipud=0.00856,
+    fliplr=0.5,
+    mosaic=1.0,
+    mixup=0.243,
+)
+training_mode = "conv_silu"
+# use normal conv to speed up training and further improve accuracy.
diff --git a/configs/yolov6m_seg_finetune.py b/configs/yolov6m_seg_finetune.py
new file mode 100644
index 00000000..ee934ec0
--- /dev/null
+++ b/configs/yolov6m_seg_finetune.py
@@ -0,0 +1,70 @@
+# YOLOv6m-seg model
+model = dict(
+    type='YOLOv6m',
+    pretrained='weights/yolov6m_seg.pt',
+    depth_multiple=0.60,
+    width_multiple=0.75,
+    backbone=dict(
+        type='CSPBepBackbone',
+        num_repeats=[1, 6, 12, 18, 6],
+        out_channels=[64, 128, 256, 512, 1024],
+        csp_e=float(2)/3,
+        fuse_P2=True,
+        ),
+    neck=dict(
+        type='CSPRepBiFPANNeck',
+        num_repeats=[12, 12, 12, 12],
+        out_channels=[256, 128, 128, 256, 256, 512],
+        csp_e=float(2)/3,
+        ),
+    head=dict(
+        type='EffiDeHead',
+        in_channels=[128, 256, 512],
+        num_layers=3,
+        begin_indices=24,
+        npr=256,
+        nm=32,
+        isseg=True,
+        issolo=False,
+        anchors=3,
+        anchors_init=[[10,13, 19,19, 33,23],
+                      [30,61, 59,59, 59,119],
+                      [116,90, 185,185, 373,326]],
+        out_indices=[17, 20, 23],
+        strides=[8, 16, 32],
+        atss_warmup_epoch=0,
+        iou_type='giou',
+        use_dfl=True,
+        reg_max=16, #if use_dfl is False, please set reg_max to 0
+        distill_weight={
+            'class': 0.8,
+            'dfl': 1.0,
+        },
+    )
+)
+
+solver = dict(
+    optim='SGD',
+    lr_scheduler='Cosine',
+    lr0=0.0032,
+    lrf=0.12,
+    momentum=0.843,
+    weight_decay=0.00036,
+    warmup_epochs=2.0,
+    warmup_momentum=0.5,
+    warmup_bias_lr=0.05
+)
+
+data_aug = dict(
+    hsv_h=0.0138,
+    hsv_s=0.664,
+    hsv_v=0.464,
+    degrees=0.373,
+    translate=0.245,
+    scale=0.898,
+    shear=0.602,
+    flipud=0.00856,
+    fliplr=0.5,
+    mosaic=1.0,
+    mixup=0.243,
+)
diff --git a/configs/yolov6n_seg_finetune.py b/configs/yolov6n_seg_finetune.py
new file mode 100644
index 00000000..0ab53501
--- /dev/null
+++ b/configs/yolov6n_seg_finetune.py
@@ -0,0 +1,69 @@
+# YOLOv6n-seg model
+model = dict(
+    type='YOLOv6n',
+    pretrained='weights/yolov6n_seg.pt',
+    depth_multiple=0.33,
+    width_multiple=0.25,
+    backbone=dict(
+        type='EfficientRep',
+        num_repeats=[1, 6, 12, 18, 6],
+        out_channels=[64, 128, 256, 512, 1024],
+        fuse_P2=True,
+        cspsppf=True,
+        ),
+    neck=dict(
+        type='RepBiFPANNeck',
+        num_repeats=[12, 12, 12, 12],
+        out_channels=[256, 128, 128, 256, 256, 512],
+        ),
+    head=dict(
+        type='EffiDeHead',
+        in_channels=[128, 256, 512],
+        num_layers=3,
+        begin_indices=24,
+        npr=256,
+        nm=32,
+        isseg=True,
+        issolo=False,
+        anchors=3,
+        anchors_init=[[10,13, 19,19, 33,23],
+                      [30,61, 59,59, 59,119],
+                      [116,90, 185,185, 373,326]],
+        out_indices=[17, 20, 23],
+        strides=[8, 16, 32],
+        atss_warmup_epoch=0,
+        iou_type='siou',
+        use_dfl=False, # set to True if you want to further train with distillation
+        reg_max=0, # set to 16 if you want to further train with distillation
+        distill_weight={
+            'class': 1.0,
+            'dfl': 1.0,
+        },
+    )
+)
+
+solver = dict(
+    optim='SGD',
+    lr_scheduler='Cosine',
+    lr0=0.0032,
+    lrf=0.12,
+    momentum=0.843,
+    weight_decay=0.00036,
+    warmup_epochs=2.0,
+    warmup_momentum=0.5,
+    warmup_bias_lr=0.05
+)
+
+data_aug = dict(
+    hsv_h=0.0138,
+    hsv_s=0.664,
+    hsv_v=0.464,
+    degrees=0.373,
+    translate=0.245,
+    scale=0.898,
+    shear=0.602,
+    flipud=0.00856,
+    fliplr=0.5,
+    mosaic=1.0,
+    mixup=0.243,
+)
\ No newline at end of file
diff --git a/configs/yolov6s_seg_finetune.py b/configs/yolov6s_seg_finetune.py
new file mode 100644
index 00000000..a39ac713
--- /dev/null
+++ b/configs/yolov6s_seg_finetune.py
@@ -0,0 +1,69 @@
+# YOLOv6s-seg model
+model = dict(
+    type='YOLOv6s',
+    pretrained='weights/yolov6s_seg.pt',
+    depth_multiple=0.33,
+    width_multiple=0.50,
+    backbone=dict(
+        type='EfficientRep',
+        num_repeats=[1, 6, 12, 18, 6],
+        out_channels=[64, 128, 256, 512, 1024],
+        fuse_P2=True,
+        cspsppf=True,
+        ),
+    neck=dict(
+        type='RepBiFPANNeck',
+        num_repeats=[12, 12, 12, 12],
+        out_channels=[256, 128, 128, 256, 256, 512],
+        ),
+    head=dict(
+        type='EffiDeHead',
+        in_channels=[128, 256, 512],
+        num_layers=3,
+        begin_indices=24,
+        npr=256,
+        nm=32,
+        isseg=True,
+        issolo=False,
+        anchors=3,
+        anchors_init=[[10,13, 19,19, 33,23],
+                      [30,61, 59,59, 59,119],
+                      [116,90, 185,185, 373,326]],
+        out_indices=[17, 20, 23],
+        strides=[8, 16, 32],
+        atss_warmup_epoch=0,
+        iou_type='giou',
+        use_dfl=False, # set to True if you want to further train with distillation
+        reg_max=0, # set to 16 if you want to further train with distillation
+        distill_weight={
+            'class': 1.0,
+            'dfl': 1.0,
+        },
+    )
+)
+
+solver = dict(
+    optim='SGD',
+    lr_scheduler='Cosine',
+    lr0=0.0032,
+    lrf=0.12,
+    momentum=0.843,
+    weight_decay=0.00036,
+    warmup_epochs=2.0,
+    warmup_momentum=0.5,
+    warmup_bias_lr=0.05
+)
+
+data_aug = dict(
+    hsv_h=0.0138,
+    hsv_s=0.664,
+    hsv_v=0.464,
+    degrees=0.373,
+    translate=0.245,
+    scale=0.898,
+    shear=0.602,
+    flipud=0.00856,
+    fliplr=0.5,
+    mosaic=1.0,
+    mixup=0.243,
+)
\ No newline at end of file
diff --git a/configs/yolov6x_seg.py b/configs/yolov6x_seg.py
index 3ef53e50..355a9e1d 100644
--- a/configs/yolov6x_seg.py
+++ b/configs/yolov6x_seg.py
@@ -1,6 +1,6 @@
-# YOLOv6l-seg model
+# YOLOv6x-seg model
 model = dict(
-    type='YOLOv6l',
+    type='YOLOv6x',
     pretrained=None,
     depth_multiple=1.33,
     width_multiple=1.25,
diff --git a/configs/yolov6x_seg_finetune.py b/configs/yolov6x_seg_finetune.py
new file mode 100644
index 00000000..44abc1d3
--- /dev/null
+++ b/configs/yolov6x_seg_finetune.py
@@ -0,0 +1,72 @@
+# YOLOv6x-seg model
+model = dict(
+    type='YOLOv6x',
+    pretrained='weights/yolov6x_seg.pt',
+    depth_multiple=1.33,
+    width_multiple=1.25,
+    backbone=dict(
+        type='CSPBepBackbone',
+        num_repeats=[1, 6, 12, 18, 6],
+        out_channels=[64, 128, 256, 512, 1024],
+        csp_e=float(1)/2,
+        fuse_P2=True,
+        ),
+    neck=dict(
+        type='CSPRepBiFPANNeck',
+        num_repeats=[12, 12, 12, 12],
+        out_channels=[256, 128, 128, 256, 256, 512],
+        csp_e=float(1)/2,
+        ),
+    head=dict(
+        type='EffiDeHead',
+        in_channels=[128, 256, 512],
+        num_layers=3,
+        begin_indices=24,
+        npr=256,
+        nm=32,
+        isseg=True,
+        issolo=False,
+        anchors=3,
+        anchors_init=[[10,13, 19,19, 33,23],
+                      [30,61, 59,59, 59,119],
+                      [116,90, 185,185, 373,326]],
+        out_indices=[17, 20, 23],
+        strides=[8, 16, 32],
+        atss_warmup_epoch=0,
+        iou_type='giou',
+        use_dfl=True,
+        reg_max=16, #if use_dfl is False, please set reg_max to 0
+        distill_weight={
+            'class': 2.0,
+            'dfl': 1.0,
+        },
+    )
+)
+
+solver = dict(
+    optim='SGD',
+    lr_scheduler='Cosine',
+    lr0=0.0032,
+    lrf=0.12,
+    momentum=0.843,
+    weight_decay=0.00036,
+    warmup_epochs=2.0,
+    warmup_momentum=0.5,
+    warmup_bias_lr=0.05
+)
+
+data_aug = dict(
+    hsv_h=0.0138,
+    hsv_s=0.664,
+    hsv_v=0.464,
+    degrees=0.373,
+    translate=0.245,
+    scale=0.898,
+    shear=0.602,
+    flipud=0.00856,
+    fliplr=0.5,
+    mosaic=1.0,
+    mixup=0.243,
+)
+training_mode = "conv_silu"
+# use normal conv to speed up training and further improve accuracy.