From b1072030a1ff0296e41aaec4fc511e5e372153a3 Mon Sep 17 00:00:00 2001 From: Longjie Zheng <32992656+zhenglongjiepheonix@users.noreply.github.com> Date: Tue, 31 May 2022 15:08:17 +0800 Subject: [PATCH] amend doc details (#144) --- md_doc/inference_with_ncnn.md | 9 ++++----- md_doc/inference_with_ppl_cuda.md | 8 ++++---- ppq/parser/ncnn_exporter.py | 11 ++++++++++- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/md_doc/inference_with_ncnn.md b/md_doc/inference_with_ncnn.md index 8ebff14c..7705c3bc 100644 --- a/md_doc/inference_with_ncnn.md +++ b/md_doc/inference_with_ncnn.md @@ -29,21 +29,21 @@ model_path = '/models/shufflenet-v2-sim.onnx' # onnx simplified model data_path = '/data/ImageNet/calibration' # calibration data folder EXECUTING_DEVICE = 'cuda' -# initialize dataloader +# initialize dataloader, suppose preprocessed calibration data is in binary format INPUT_SHAPE = [1, 3, 224, 224] npy_array = [np.fromfile(os.path.join(data_path, file_name), dtype=np.float32).reshape(*INPUT_SHAPE) for file_name in os.listdir(data_path)] dataloader = [torch.from_numpy(np.load(npy_tensor)) for npy_tensor in npy_array] # confirm platform and setting target_platform = TargetPlatform.NCNN_INT8 -setting = QuantizationSettingFactory.academic_setting() # for ncnn, no fusion +setting = QuantizationSettingFactory.ncnn_setting() # load and schedule graph ppq_graph_ir = load_onnx_graph(model_path) ppq_graph_ir = dispatch_graph(ppq_graph_ir, target_platform, setting) # intialize quantizer and executor -executor = TorchExecutor(ppq_graph_ir, device='cuda') +executor = TorchExecutor(ppq_graph_ir, device=EXECUTING_DEVICE) quantizer = QUANTIZER_COLLECTION[target_platform](graph=ppq_graph_ir) # run quantization @@ -61,8 +61,7 @@ quantizer.quantize( # export quantization param file and model file export_ppq_graph(graph=ppq_ir_graph, platform=TargetPlatform.NCNN_INT8, graph_save_to='shufflenet-v2-sim-ppq', config_save_to='shufflenet-v2-sim-ppq.table') ``` -note that your dataloader should provide batch data which is in the same shape of the input of simplified model, because -simplified model can't take dynamic-shape inputs. +note that your dataloader should provide batch data which is in the same shape of the input of simplified model, because simplified model can't take dynamic-shape inputs. ## Convert Your Model if you have compiled ncnn correctly, there should be executables in the installation binary folder which can convert onnx model diff --git a/md_doc/inference_with_ppl_cuda.md b/md_doc/inference_with_ppl_cuda.md index 46168d99..6ebede61 100644 --- a/md_doc/inference_with_ppl_cuda.md +++ b/md_doc/inference_with_ppl_cuda.md @@ -1,7 +1,7 @@ # Inference with PPL CUDA this tutorial gives you a simple illustration how you could actually use PPQ to quantize your model and export quantization parameter file to inference with ppl cuda as your backend. Similar to [inference_with_ncnn](./inference_with_ncnn.md), we use an onnx model, shufflenet-v2, as an example here to illustrate the whole process -going from ready-to-quantize model to ready-to-deploy model and corresponding quantization parameter +going from ready-to-quantize model to ready-to-deploy polished onnx model, with quantization parameter file generated ## Quantize Your Network as we have specified in [how_to_use](./how_to_use.md), we should prepare our calibration dataloader, confirm @@ -21,21 +21,21 @@ model_path = '/models/shufflenet-v2.onnx' data_path = '/data/ImageNet/calibration' EXECUTING_DEVICE = 'cuda' -# initialize dataloader +# initialize dataloader, suppose preprocessed input data is in binary format INPUT_SHAPE = [1, 3, 224, 224] npy_array = [np.fromfile(os.path.join(data_path, file_name), dtype=np.float32).reshape(*INPUT_SHAPE) for file_name in os.listdir(data_path)] dataloader = [torch.from_numpy(np.load(npy_tensor)) for npy_tensor in npy_array] # confirm platform and setting target_platform = TargetPlatform.PPL_CUDA_INT8 -setting = QuantizationSettingFactory.pplcuda_setting() # for ncnn, no fusion +setting = QuantizationSettingFactory.pplcuda_setting() # load and schedule graph ppq_graph_ir = load_onnx_graph(model_path) ppq_graph_ir = dispatch_graph(ppq_graph_ir, target_platform, setting) # intialize quantizer and executor -executor = TorchExecutor(ppq_graph_ir, device='cuda') +executor = TorchExecutor(ppq_graph_ir, device=EXECUTING_DEVICE) quantizer = QUANTIZER_COLLECTION[target_platform](graph=ppq_graph_ir) # run quantization diff --git a/ppq/parser/ncnn_exporter.py b/ppq/parser/ncnn_exporter.py index 7285759a..2b364b3a 100644 --- a/ppq/parser/ncnn_exporter.py +++ b/ppq/parser/ncnn_exporter.py @@ -1,3 +1,4 @@ +import os from typing import List from ppq.core import (DataType, NetworkFramework, QuantizationProperty, @@ -46,9 +47,17 @@ def export(self, file_path: str, graph: BaseGraph, config_path: str = None, inpu if config_path is not None: self.export_quantization_config(config_path, graph) + _, ext = os.path.splitext(file_path) + if ext == '.onnx': + exporter = OnnxExporter() + exporter.export(file_path=file_path, graph=graph, config_path=None) + elif ext in {'.prototxt', '.caffemodel'}: + exporter = CaffeExporter() + exporter.export(file_path=file_path, graph=graph, config_path=None, input_shapes=input_shapes) + # no pre-determined export format, we export according to the # original model format - if graph._built_from == NetworkFramework.CAFFE: + elif graph._built_from == NetworkFramework.CAFFE: exporter = CaffeExporter() exporter.export(file_path=file_path, graph=graph, config_path=None, input_shapes=input_shapes)