Fix quant_format argument for 4bit quantizer (#21581)

### Description Original argument accepts Enum QuantFormat.QOperator or QuantFormat.QDQ, but the default value is QOperator. Change the argument to str to accept QOperator or QDQ and convert to QuantFormat after parsing. ### Motivation and Context Bug fix
microsoft · Aug 3, 2024 · 8643127 · 8643127
1 parent de81dbe
commit 8643127
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py b/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py
@@ -797,8 +797,8 @@ def parse_args():
     parser.add_argument(
         "--quant_format",
         default="QOperator",
-        type=QuantFormat,
-        choices=list(QuantFormat),
+        type=str,
+        choices=["QOperator", "QDQ"],
         help="QuantFormat {QOperator, QDQ}"
         "QOperator format quantizes the model with quantized operators directly."
         "QDQ format quantize the model by inserting DeQuantizeLinear before the MatMul.",
@@ -814,7 +814,7 @@ def parse_args():
 
     input_model_path = args.input_model
     output_model_path = args.output_model
-    quant_format = args.quant_format
+    quant_format = QuantFormat[args.quant_format]
 
     if os.path.exists(output_model_path):
         logger.error(f"file {output_model_path} already exists")