diff --git a/md_doc/inference_with_ncnn.md b/md_doc/inference_with_ncnn.md index 8ebff14c..7705c3bc 100644 --- a/md_doc/inference_with_ncnn.md +++ b/md_doc/inference_with_ncnn.md @@ -29,21 +29,21 @@ model_path = '/models/shufflenet-v2-sim.onnx' # onnx simplified model data_path = '/data/ImageNet/calibration' # calibration data folder EXECUTING_DEVICE = 'cuda' -# initialize dataloader +# initialize dataloader, suppose preprocessed calibration data is in binary format INPUT_SHAPE = [1, 3, 224, 224] npy_array = [np.fromfile(os.path.join(data_path, file_name), dtype=np.float32).reshape(*INPUT_SHAPE) for file_name in os.listdir(data_path)] dataloader = [torch.from_numpy(np.load(npy_tensor)) for npy_tensor in npy_array] # confirm platform and setting target_platform = TargetPlatform.NCNN_INT8 -setting = QuantizationSettingFactory.academic_setting() # for ncnn, no fusion +setting = QuantizationSettingFactory.ncnn_setting() # load and schedule graph ppq_graph_ir = load_onnx_graph(model_path) ppq_graph_ir = dispatch_graph(ppq_graph_ir, target_platform, setting) # intialize quantizer and executor -executor = TorchExecutor(ppq_graph_ir, device='cuda') +executor = TorchExecutor(ppq_graph_ir, device=EXECUTING_DEVICE) quantizer = QUANTIZER_COLLECTION[target_platform](graph=ppq_graph_ir) # run quantization @@ -61,8 +61,7 @@ quantizer.quantize( # export quantization param file and model file export_ppq_graph(graph=ppq_ir_graph, platform=TargetPlatform.NCNN_INT8, graph_save_to='shufflenet-v2-sim-ppq', config_save_to='shufflenet-v2-sim-ppq.table') ``` -note that your dataloader should provide batch data which is in the same shape of the input of simplified model, because -simplified model can't take dynamic-shape inputs. +note that your dataloader should provide batch data which is in the same shape of the input of simplified model, because simplified model can't take dynamic-shape inputs. ## Convert Your Model if you have compiled ncnn correctly, there should be executables in the installation binary folder which can convert onnx model diff --git a/md_doc/inference_with_ppl_cuda.md b/md_doc/inference_with_ppl_cuda.md index 46168d99..6ebede61 100644 --- a/md_doc/inference_with_ppl_cuda.md +++ b/md_doc/inference_with_ppl_cuda.md @@ -1,7 +1,7 @@ # Inference with PPL CUDA this tutorial gives you a simple illustration how you could actually use PPQ to quantize your model and export quantization parameter file to inference with ppl cuda as your backend. Similar to [inference_with_ncnn](./inference_with_ncnn.md), we use an onnx model, shufflenet-v2, as an example here to illustrate the whole process -going from ready-to-quantize model to ready-to-deploy model and corresponding quantization parameter +going from ready-to-quantize model to ready-to-deploy polished onnx model, with quantization parameter file generated ## Quantize Your Network as we have specified in [how_to_use](./how_to_use.md), we should prepare our calibration dataloader, confirm @@ -21,21 +21,21 @@ model_path = '/models/shufflenet-v2.onnx' data_path = '/data/ImageNet/calibration' EXECUTING_DEVICE = 'cuda' -# initialize dataloader +# initialize dataloader, suppose preprocessed input data is in binary format INPUT_SHAPE = [1, 3, 224, 224] npy_array = [np.fromfile(os.path.join(data_path, file_name), dtype=np.float32).reshape(*INPUT_SHAPE) for file_name in os.listdir(data_path)] dataloader = [torch.from_numpy(np.load(npy_tensor)) for npy_tensor in npy_array] # confirm platform and setting target_platform = TargetPlatform.PPL_CUDA_INT8 -setting = QuantizationSettingFactory.pplcuda_setting() # for ncnn, no fusion +setting = QuantizationSettingFactory.pplcuda_setting() # load and schedule graph ppq_graph_ir = load_onnx_graph(model_path) ppq_graph_ir = dispatch_graph(ppq_graph_ir, target_platform, setting) # intialize quantizer and executor -executor = TorchExecutor(ppq_graph_ir, device='cuda') +executor = TorchExecutor(ppq_graph_ir, device=EXECUTING_DEVICE) quantizer = QUANTIZER_COLLECTION[target_platform](graph=ppq_graph_ir) # run quantization diff --git a/ppq/parser/ncnn_exporter.py b/ppq/parser/ncnn_exporter.py index 7285759a..2b364b3a 100644 --- a/ppq/parser/ncnn_exporter.py +++ b/ppq/parser/ncnn_exporter.py @@ -1,3 +1,4 @@ +import os from typing import List from ppq.core import (DataType, NetworkFramework, QuantizationProperty, @@ -46,9 +47,17 @@ def export(self, file_path: str, graph: BaseGraph, config_path: str = None, inpu if config_path is not None: self.export_quantization_config(config_path, graph) + _, ext = os.path.splitext(file_path) + if ext == '.onnx': + exporter = OnnxExporter() + exporter.export(file_path=file_path, graph=graph, config_path=None) + elif ext in {'.prototxt', '.caffemodel'}: + exporter = CaffeExporter() + exporter.export(file_path=file_path, graph=graph, config_path=None, input_shapes=input_shapes) + # no pre-determined export format, we export according to the # original model format - if graph._built_from == NetworkFramework.CAFFE: + elif graph._built_from == NetworkFramework.CAFFE: exporter = CaffeExporter() exporter.export(file_path=file_path, graph=graph, config_path=None, input_shapes=input_shapes)