diff --git a/md_doc/inference_with_ncnn.md b/md_doc/inference_with_ncnn.md
index 8ebff14c..7705c3bc 100644
--- a/md_doc/inference_with_ncnn.md
+++ b/md_doc/inference_with_ncnn.md
@@ -29,21 +29,21 @@ model_path = '/models/shufflenet-v2-sim.onnx' # onnx simplified model
 data_path  = '/data/ImageNet/calibration' # calibration data folder
 EXECUTING_DEVICE = 'cuda'
 
-# initialize dataloader 
+# initialize dataloader, suppose preprocessed calibration data is in binary format
 INPUT_SHAPE = [1, 3, 224, 224]
 npy_array = [np.fromfile(os.path.join(data_path, file_name), dtype=np.float32).reshape(*INPUT_SHAPE) for file_name in os.listdir(data_path)]
 dataloader = [torch.from_numpy(np.load(npy_tensor)) for npy_tensor in npy_array]
 
 # confirm platform and setting
 target_platform = TargetPlatform.NCNN_INT8
-setting = QuantizationSettingFactory.academic_setting() # for ncnn, no fusion
+setting = QuantizationSettingFactory.ncnn_setting()
 
 # load and schedule graph
 ppq_graph_ir = load_onnx_graph(model_path)
 ppq_graph_ir = dispatch_graph(ppq_graph_ir, target_platform, setting)
 
 # intialize quantizer and executor
-executor = TorchExecutor(ppq_graph_ir, device='cuda')
+executor = TorchExecutor(ppq_graph_ir, device=EXECUTING_DEVICE)
 quantizer = QUANTIZER_COLLECTION[target_platform](graph=ppq_graph_ir)
 
 # run quantization
@@ -61,8 +61,7 @@ quantizer.quantize(
 # export quantization param file and model file
 export_ppq_graph(graph=ppq_ir_graph, platform=TargetPlatform.NCNN_INT8, graph_save_to='shufflenet-v2-sim-ppq', config_save_to='shufflenet-v2-sim-ppq.table')
 ```
-note that your dataloader should provide batch data which is in the same shape of the input of simplified model, because
-simplified model can't take dynamic-shape inputs.
+note that your dataloader should provide batch data which is in the same shape of the input of simplified model, because simplified model can't take dynamic-shape inputs.
 
 ## Convert Your Model
 if you have compiled ncnn correctly, there should be executables in the installation binary folder which can convert onnx model
diff --git a/md_doc/inference_with_ppl_cuda.md b/md_doc/inference_with_ppl_cuda.md
index 46168d99..6ebede61 100644
--- a/md_doc/inference_with_ppl_cuda.md
+++ b/md_doc/inference_with_ppl_cuda.md
@@ -1,7 +1,7 @@
 # Inference with PPL CUDA
 this tutorial gives you a simple illustration how you could actually use PPQ to quantize your model and export
 quantization parameter file to inference with ppl cuda as your backend. Similar to [inference_with_ncnn](./inference_with_ncnn.md), we use an onnx model, shufflenet-v2, as an example here to illustrate the whole process
-going from ready-to-quantize model to ready-to-deploy model and corresponding quantization parameter 
+going from ready-to-quantize model to ready-to-deploy polished onnx model, with quantization parameter file generated
 
 ## Quantize Your Network
 as we have specified in [how_to_use](./how_to_use.md), we should prepare our calibration dataloader, confirm
@@ -21,21 +21,21 @@ model_path = '/models/shufflenet-v2.onnx'
 data_path  = '/data/ImageNet/calibration'
 EXECUTING_DEVICE = 'cuda'
 
-# initialize dataloader 
+# initialize dataloader, suppose preprocessed input data is in binary format
 INPUT_SHAPE = [1, 3, 224, 224]
 npy_array = [np.fromfile(os.path.join(data_path, file_name), dtype=np.float32).reshape(*INPUT_SHAPE) for file_name in os.listdir(data_path)]
 dataloader = [torch.from_numpy(np.load(npy_tensor)) for npy_tensor in npy_array]
 
 # confirm platform and setting
 target_platform = TargetPlatform.PPL_CUDA_INT8
-setting = QuantizationSettingFactory.pplcuda_setting() # for ncnn, no fusion
+setting = QuantizationSettingFactory.pplcuda_setting()
 
 # load and schedule graph
 ppq_graph_ir = load_onnx_graph(model_path)
 ppq_graph_ir = dispatch_graph(ppq_graph_ir, target_platform, setting)
 
 # intialize quantizer and executor
-executor = TorchExecutor(ppq_graph_ir, device='cuda')
+executor = TorchExecutor(ppq_graph_ir, device=EXECUTING_DEVICE)
 quantizer = QUANTIZER_COLLECTION[target_platform](graph=ppq_graph_ir)
 
 # run quantization
diff --git a/ppq/parser/ncnn_exporter.py b/ppq/parser/ncnn_exporter.py
index 7285759a..2b364b3a 100644
--- a/ppq/parser/ncnn_exporter.py
+++ b/ppq/parser/ncnn_exporter.py
@@ -1,3 +1,4 @@
+import os
 from typing import List
 
 from ppq.core import (DataType, NetworkFramework, QuantizationProperty,
@@ -46,9 +47,17 @@ def export(self, file_path: str, graph: BaseGraph, config_path: str = None, inpu
         if config_path is not None:
             self.export_quantization_config(config_path, graph)
 
+        _, ext = os.path.splitext(file_path)
+        if ext == '.onnx':
+            exporter = OnnxExporter()
+            exporter.export(file_path=file_path, graph=graph, config_path=None)
+        elif ext in {'.prototxt', '.caffemodel'}:
+            exporter = CaffeExporter()
+            exporter.export(file_path=file_path, graph=graph, config_path=None, input_shapes=input_shapes)
+        
         # no pre-determined export format, we export according to the
         # original model format
-        if graph._built_from == NetworkFramework.CAFFE:
+        elif graph._built_from == NetworkFramework.CAFFE:
             exporter = CaffeExporter()
             exporter.export(file_path=file_path, graph=graph, config_path=None, input_shapes=input_shapes)