From eb21c0e278aa9cbaabe2fb7b2c7028a20b91346b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Wed, 24 Jan 2024 14:15:22 +0100 Subject: [PATCH 1/7] Add missing dependency onnxconverter_common, fix multi regression with xgboost (#679) * add missing dependency onnxconverter_common Signed-off-by: Xavier Dupre * issue 676 Signed-off-by: Xavier Dupre * issue 676 Signed-off-by: Xavier Dupre * fix issue 676 Signed-off-by: Xavier Dupre * fix shape calcultator Signed-off-by: Xavier Dupre * fix new name for sparse arguement Signed-off-by: Xavier Dupre * changelogs Signed-off-by: Xavier Dupre * improves stability Signed-off-by: Xavier Dupre --------- Signed-off-by: Xavier Dupre --- CHANGELOGS.md | 7 +++ onnxmltools/__init__.py | 2 +- onnxmltools/convert/xgboost/common.py | 6 ++ .../xgboost/operator_converters/XGBoost.py | 3 + requirements.txt | 1 + .../test_xgboost_converters_base_score.py | 2 +- tests/xgboost/test_xgboost_issues.py | 55 +++++++++++++++++++ tests/xgboost/test_xgboost_pipeline.py | 2 +- 8 files changed, 75 insertions(+), 3 deletions(-) create mode 100644 tests/xgboost/test_xgboost_issues.py diff --git a/CHANGELOGS.md b/CHANGELOGS.md index 1fe94578..6598aac4 100644 --- a/CHANGELOGS.md +++ b/CHANGELOGS.md @@ -1,5 +1,12 @@ # Change Logs +## 1.13.0 (development) + +* Add missing dependency onnxconverter_common, fix multi regression with xgboost, + [#679](https://github.com/onnx/onnxmltools/pull/679), + fixes issues [No module named 'onnxconverter_common'](https://github.com/onnx/onnxmltools/issues/673), + [onnx converted : xgboostRegressor multioutput model predicts 1 dimension instead of original 210 dimensions.](https://github.com/onnx/onnxmltools/issues/676) + ## 1.12.0 * Fix early stopping for XGBClassifier and xgboost > 2 diff --git a/onnxmltools/__init__.py b/onnxmltools/__init__.py index 7015cd04..b898fe11 100644 --- a/onnxmltools/__init__.py +++ b/onnxmltools/__init__.py @@ -5,7 +5,7 @@ This framework converts any machine learned model into onnx format which is a common language to describe any machine learned model. """ -__version__ = "1.12.0" +__version__ = "1.13.0" __author__ = "ONNX" __producer__ = "OnnxMLTools" __producer_version__ = __version__ diff --git a/onnxmltools/convert/xgboost/common.py b/onnxmltools/convert/xgboost/common.py index 3a79aaad..4a10a48f 100644 --- a/onnxmltools/convert/xgboost/common.py +++ b/onnxmltools/convert/xgboost/common.py @@ -32,6 +32,12 @@ def get_xgb_params(xgb_node): bs = float(config["learner"]["learner_model_param"]["base_score"]) # xgboost >= 2.0 params["base_score"] = bs + if "num_target" in config["learner"]["learner_model_param"]: + params["n_targets"] = int( + config["learner"]["learner_model_param"]["num_target"] + ) + else: + params["n_targets"] = 1 bst = xgb_node.get_booster() if hasattr(bst, "best_ntree_limit"): diff --git a/onnxmltools/convert/xgboost/operator_converters/XGBoost.py b/onnxmltools/convert/xgboost/operator_converters/XGBoost.py index 904630b1..d241f186 100644 --- a/onnxmltools/convert/xgboost/operator_converters/XGBoost.py +++ b/onnxmltools/convert/xgboost/operator_converters/XGBoost.py @@ -254,6 +254,9 @@ def convert(scope, operator, container): js_trees, attr_pairs, [1 for _ in js_trees], False ) + params = XGBConverter.get_xgb_params(xgb_node) + attr_pairs["n_targets"] = params["n_targets"] + # add nodes if objective == "count:poisson": names = [scope.get_unique_variable_name("tree")] diff --git a/requirements.txt b/requirements.txt index 308201c6..5362e17d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ numpy onnx +onnxconverter_common diff --git a/tests/xgboost/test_xgboost_converters_base_score.py b/tests/xgboost/test_xgboost_converters_base_score.py index 27d40172..528560d8 100644 --- a/tests/xgboost/test_xgboost_converters_base_score.py +++ b/tests/xgboost/test_xgboost_converters_base_score.py @@ -103,7 +103,7 @@ def test_xgbclassifier_sparse_base_score(self): assert_almost_equal(expected.reshape((-1, 2)), got, decimal=4) def test_xgbclassifier_sparse_no_base_score(self): - X, y = make_regression(n_samples=200, n_features=10, random_state=0) + X, y = make_regression(n_samples=400, n_features=10, random_state=0) mask = np.random.randint(0, 50, size=(X.shape)) != 0 X[mask] = 0 y = (y + mask.sum(axis=1, keepdims=0)).astype(np.float32) diff --git a/tests/xgboost/test_xgboost_issues.py b/tests/xgboost/test_xgboost_issues.py new file mode 100644 index 00000000..fac5a8f0 --- /dev/null +++ b/tests/xgboost/test_xgboost_issues.py @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: Apache-2.0 + +import unittest + + +class TestXGBoostIssues(unittest.TestCase): + def test_issue_676(self): + import json + import onnxruntime + import xgboost + import numpy as np + from skl2onnx import convert_sklearn + from skl2onnx.common.data_types import FloatTensorType + from skl2onnx import update_registered_converter + from onnxmltools.convert.xgboost.operator_converters.XGBoost import ( + convert_xgboost, + ) + + def xgbregressor_shape_calculator(operator): + config = json.loads(operator.raw_operator.get_booster().save_config()) + n_targets = int(config["learner"]["learner_model_param"]["num_target"]) + operator.outputs[0].type.shape = [None, n_targets] + + update_registered_converter( + xgboost.XGBRegressor, + "XGBoostXGBRegressor", + xgbregressor_shape_calculator, + convert_xgboost, + ) + # Your data and labels + X = np.random.rand(100, 10) + y = np.random.rand(100, 2) + + # Train XGBoost regressor + model = xgboost.XGBRegressor( + objective="reg:squarederror", n_estimators=2, maxdepth=2 + ) + model.fit(X, y) + + # Define input type (adjust shape according to your input) + initial_type = [("float_input", FloatTensorType([None, X.shape[1]]))] + + # Convert XGBoost model to ONNX + onnx_model = convert_sklearn(model, initial_types=initial_type, target_opset=12) + self.assertIn("dim_value: 2", str(onnx_model.graph.output)) + + sess = onnxruntime.InferenceSession( + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"float_input": X.astype(np.float32)}) + self.assertEqual(got[0].shape, (100, 2)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/xgboost/test_xgboost_pipeline.py b/tests/xgboost/test_xgboost_pipeline.py index ffc7bfc2..1df296f6 100644 --- a/tests/xgboost/test_xgboost_pipeline.py +++ b/tests/xgboost/test_xgboost_pipeline.py @@ -50,7 +50,7 @@ def transformer_for_column(column): if column.dtype in ["bool"]: return "passthrough" if column.dtype in ["O"]: - return OneHotEncoder(sparse=False) + return OneHotEncoder(sparse_output=False) raise ValueError() return ColumnTransformer( From b78899975aafe9c7072bd1c0f1d8e373cec722ce Mon Sep 17 00:00:00 2001 From: xkszltl Date: Tue, 2 Apr 2024 05:41:34 -0700 Subject: [PATCH 2/7] Fix pkg name of onnxconverter_common (#683) * Fix pkg name of onnxconverter_common The pkg is dashed while the lib is underscored Signed-off-by: xkszltl * Update requirements-dev.txt Signed-off-by: xkszltl --------- Signed-off-by: xkszltl --- requirements-dev.txt | 1 + requirements.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index cfd46ddc..1ba960b8 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -7,6 +7,7 @@ lightgbm mleap numpy openpyxl +onnxconverter-common pandas pyspark pytest diff --git a/requirements.txt b/requirements.txt index 5362e17d..b1b42f87 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ numpy onnx -onnxconverter_common +onnxconverter-common From 4c0f6dba1a58fe26133af6b229b2f1e9a31e84a5 Mon Sep 17 00:00:00 2001 From: xiaowuhu Date: Tue, 2 Apr 2024 20:42:09 +0800 Subject: [PATCH 3/7] update ort to 117 (#682) Signed-off-by: xiaowuhu --- .azure-pipelines/linux-conda-CI.yml | 10 ++++++++++ .azure-pipelines/win32-conda-CI.yml | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml index b45760ba..8388d6ee 100644 --- a/.azure-pipelines/linux-conda-CI.yml +++ b/.azure-pipelines/linux-conda-CI.yml @@ -15,6 +15,16 @@ jobs: strategy: matrix: + Python311-1150-RT117-xgb2-lgbm40: + python.version: '3.11' + ONNX_PATH: 'onnx==1.15.0' + ONNXRT_PATH: 'onnxruntime==1.17' + COREML_PATH: NONE + lightgbm.version: '>=4.0' + xgboost.version: '>=2' + numpy.version: '' + scipy.version: '' + Python311-1150-RT1163-xgb2-lgbm40: python.version: '3.11' ONNX_PATH: 'onnx==1.15.0' diff --git a/.azure-pipelines/win32-conda-CI.yml b/.azure-pipelines/win32-conda-CI.yml index f3a42ba5..c87f9491 100644 --- a/.azure-pipelines/win32-conda-CI.yml +++ b/.azure-pipelines/win32-conda-CI.yml @@ -15,6 +15,14 @@ jobs: strategy: matrix: + Python311-1150-RT117: + python.version: '3.11' + ONNX_PATH: 'onnx==1.15.0' + ONNXRT_PATH: 'onnxruntime==1.17' + COREML_PATH: NONE + numpy.version: '' + xgboost.version: '2.0.2' + Python311-1150-RT1163: python.version: '3.11' ONNX_PATH: 'onnx==1.15.0' From b1e1068f5c12e072386d107246c0d4ac7c1ff56d Mon Sep 17 00:00:00 2001 From: Gerard Casas Saez Date: Tue, 2 Apr 2024 06:46:14 -0600 Subject: [PATCH 4/7] handle better binary classifier class count for boosters (#681) --- .../convert/xgboost/shape_calculators/Classifier.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/onnxmltools/convert/xgboost/shape_calculators/Classifier.py b/onnxmltools/convert/xgboost/shape_calculators/Classifier.py index e44d5a52..c64d9743 100644 --- a/onnxmltools/convert/xgboost/shape_calculators/Classifier.py +++ b/onnxmltools/convert/xgboost/shape_calculators/Classifier.py @@ -27,12 +27,12 @@ def calculate_xgboost_classifier_output_shapes(operator): objective = params["objective"] n_estimators = get_n_estimators_classifier(xgb_node, params, js_trees) num_class = params.get("num_class", None) - - if num_class is not None: + + if objective == "binary:logistic": + ncl = 2 + elif num_class is not None: ncl = num_class n_estimators = ntrees // ncl - elif objective == "binary:logistic": - ncl = 2 else: ncl = ntrees // n_estimators if objective == "reg:logistic" and ncl == 1: @@ -46,7 +46,7 @@ def calculate_xgboost_classifier_output_shapes(operator): operator.outputs[0].type = Int64TensorType(shape=[N]) else: operator.outputs[0].type = StringTensorType(shape=[N]) - operator.outputs[1].type = operator.outputs[1].type = FloatTensorType([N, ncl]) + operator.outputs[1].type = FloatTensorType([N, ncl]) register_shape_calculator("XGBClassifier", calculate_xgboost_classifier_output_shapes) From a105247903398c9c2e26906af447d90cb8f380b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Wed, 3 Apr 2024 11:51:41 +0200 Subject: [PATCH 5/7] Upgrade CI to onnx==1.16.0 (#689) * upgrade to onnx==1.16.0 Signed-off-by: Xavier Dupre * doc Signed-off-by: Xavier Dupre * black Signed-off-by: Xavier Dupre * black Signed-off-by: Xavier Dupre * fix opset Signed-off-by: Xavier Dupre --------- Signed-off-by: Xavier Dupre --- .azure-pipelines/linux-conda-CI.yml | 6 +-- .azure-pipelines/win32-conda-CI.yml | 22 ++++----- .github/workflows/black-ruff.yml | 2 +- CHANGELOGS.md | 2 + .../operator_converters/TreeEnsemble.py | 18 +++---- .../convert/sparkml/ops_input_output.py | 48 ++++++++++++------- .../xgboost/shape_calculators/Classifier.py | 2 +- pyproject.toml | 4 +- tests/xgboost/test_xgboost_issues.py | 4 +- 9 files changed, 62 insertions(+), 46 deletions(-) diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml index 8388d6ee..f1d95382 100644 --- a/.azure-pipelines/linux-conda-CI.yml +++ b/.azure-pipelines/linux-conda-CI.yml @@ -15,10 +15,10 @@ jobs: strategy: matrix: - Python311-1150-RT117-xgb2-lgbm40: + Python311-1160-RT1171-xgb2-lgbm40: python.version: '3.11' - ONNX_PATH: 'onnx==1.15.0' - ONNXRT_PATH: 'onnxruntime==1.17' + ONNX_PATH: 'onnx==1.16.0' + ONNXRT_PATH: 'onnxruntime==1.17.1' COREML_PATH: NONE lightgbm.version: '>=4.0' xgboost.version: '>=2' diff --git a/.azure-pipelines/win32-conda-CI.yml b/.azure-pipelines/win32-conda-CI.yml index c87f9491..4fbe704d 100644 --- a/.azure-pipelines/win32-conda-CI.yml +++ b/.azure-pipelines/win32-conda-CI.yml @@ -15,13 +15,13 @@ jobs: strategy: matrix: - Python311-1150-RT117: + Python311-1160-RT1171: python.version: '3.11' - ONNX_PATH: 'onnx==1.15.0' - ONNXRT_PATH: 'onnxruntime==1.17' + ONNX_PATH: 'onnx==1.16.0' + ONNXRT_PATH: 'onnxruntime==1.17.1' COREML_PATH: NONE numpy.version: '' - xgboost.version: '2.0.2' + xgboost.version: '>=2.0' Python311-1150-RT1163: python.version: '3.11' @@ -29,7 +29,7 @@ jobs: ONNXRT_PATH: 'onnxruntime==1.16.3' COREML_PATH: NONE numpy.version: '' - xgboost.version: '2.0.2' + xgboost.version: '==2.0.2' Python311-1150-RT1162: python.version: '3.11' @@ -37,7 +37,7 @@ jobs: ONNXRT_PATH: 'onnxruntime==1.16.2' COREML_PATH: NONE numpy.version: '' - xgboost.version: '2.0.2' + xgboost.version: '==2.0.2' Python311-1141-RT1162: python.version: '3.11' @@ -45,7 +45,7 @@ jobs: ONNXRT_PATH: 'onnxruntime==1.16.2' COREML_PATH: NONE numpy.version: '' - xgboost.version: '1.7.5' + xgboost.version: '==1.7.5' Python310-1141-RT1151: python.version: '3.10' @@ -53,7 +53,7 @@ jobs: ONNXRT_PATH: 'onnxruntime==1.15.1' COREML_PATH: NONE numpy.version: '' - xgboost.version: '1.7.5' + xgboost.version: '==1.7.5' Python310-1141-RT1140: python.version: '3.10' @@ -61,7 +61,7 @@ jobs: ONNXRT_PATH: onnxruntime==1.14.0 COREML_PATH: NONE numpy.version: '' - xgboost.version: '1.7.5' + xgboost.version: '==1.7.5' Python39-1141-RT1140: python.version: '3.9' @@ -69,7 +69,7 @@ jobs: ONNXRT_PATH: onnxruntime==1.14.0 COREML_PATH: NONE numpy.version: '' - xgboost.version: '1.7.5' + xgboost.version: '==1.7.5' maxParallel: 3 @@ -96,7 +96,7 @@ jobs: call activate py$(python.version) python -m pip install --upgrade scikit-learn python -m pip install --upgrade lightgbm - python -m pip install "xgboost==$(xgboost.version)" + python -m pip install "xgboost$(xgboost.version)" displayName: 'Install scikit-learn' - script: | diff --git a/.github/workflows/black-ruff.yml b/.github/workflows/black-ruff.yml index 09da3fc3..dc846df3 100644 --- a/.github/workflows/black-ruff.yml +++ b/.github/workflows/black-ruff.yml @@ -5,7 +5,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - uses: psf/black@193ee766ca496871f93621d6b58d57a6564ff81b # stable 23.7.0 + - uses: psf/black@e42f1ffbe159018c8e887f775c0fd7b51b3cf787 # stable 24.3.0 with: options: "--diff --check" src: "." diff --git a/CHANGELOGS.md b/CHANGELOGS.md index 6598aac4..2920b296 100644 --- a/CHANGELOGS.md +++ b/CHANGELOGS.md @@ -2,6 +2,8 @@ ## 1.13.0 (development) +* Handle issue with binary classifier setting output to [N,1] vs [N,2], + [#681](https://github.com/onnx/onnxmltools/pull/681) * Add missing dependency onnxconverter_common, fix multi regression with xgboost, [#679](https://github.com/onnx/onnxmltools/pull/679), fixes issues [No module named 'onnxconverter_common'](https://github.com/onnx/onnxmltools/issues/673), diff --git a/onnxmltools/convert/coreml/operator_converters/TreeEnsemble.py b/onnxmltools/convert/coreml/operator_converters/TreeEnsemble.py index db52cc01..6dda3fbf 100644 --- a/onnxmltools/convert/coreml/operator_converters/TreeEnsemble.py +++ b/onnxmltools/convert/coreml/operator_converters/TreeEnsemble.py @@ -45,9 +45,9 @@ def convert_tree_ensemble_model(scope, operator, container): op_type = "TreeEnsembleClassifier" prefix = "class" nodes = raw_model.treeEnsembleClassifier.treeEnsemble.nodes - attrs[ - "base_values" - ] = raw_model.treeEnsembleClassifier.treeEnsemble.basePredictionValue + attrs["base_values"] = ( + raw_model.treeEnsembleClassifier.treeEnsemble.basePredictionValue + ) attrs["post_transform"] = get_onnx_tree_post_transform( raw_model.treeEnsembleClassifier.postEvaluationTransform ) @@ -72,12 +72,12 @@ def convert_tree_ensemble_model(scope, operator, container): op_type = "TreeEnsembleRegressor" prefix = "target" nodes = raw_model.treeEnsembleRegressor.treeEnsemble.nodes - attrs[ - "base_values" - ] = raw_model.treeEnsembleRegressor.treeEnsemble.basePredictionValue - attrs[ - "n_targets" - ] = raw_model.treeEnsembleRegressor.treeEnsemble.numPredictionDimensions + attrs["base_values"] = ( + raw_model.treeEnsembleRegressor.treeEnsemble.basePredictionValue + ) + attrs["n_targets"] = ( + raw_model.treeEnsembleRegressor.treeEnsemble.numPredictionDimensions + ) attrs["post_transform"] = get_onnx_tree_post_transform( raw_model.treeEnsembleRegressor.postEvaluationTransform ) diff --git a/onnxmltools/convert/sparkml/ops_input_output.py b/onnxmltools/convert/sparkml/ops_input_output.py index ae241667..d74f95dd 100644 --- a/onnxmltools/convert/sparkml/ops_input_output.py +++ b/onnxmltools/convert/sparkml/ops_input_output.py @@ -134,12 +134,16 @@ def build_io_name_map(): lambda model: [model.getOrDefault("predictionCol")], ), "pyspark.ml.feature.ImputerModel": ( - lambda model: model.getOrDefault("inputCols") - if model.isSet("inputCols") - else [model.getOrDefault("inputCol")], - lambda model: model.getOrDefault("outputCols") - if model.isSet("outputCols") - else [model.getOrDefault("outputCol")], + lambda model: ( + model.getOrDefault("inputCols") + if model.isSet("inputCols") + else [model.getOrDefault("inputCol")] + ), + lambda model: ( + model.getOrDefault("outputCols") + if model.isSet("outputCols") + else [model.getOrDefault("outputCol")] + ), ), "pyspark.ml.feature.MaxAbsScalerModel": ( lambda model: [model.getOrDefault("inputCol")], @@ -177,20 +181,28 @@ def build_io_name_map(): ], ), "pyspark.ml.feature.OneHotEncoderModel": ( - lambda model: model.getOrDefault("inputCols") - if model.isSet("inputCols") - else [model.getOrDefault("inputCol")], - lambda model: model.getOrDefault("outputCols") - if model.isSet("outputCols") - else [model.getOrDefault("outputCol")], + lambda model: ( + model.getOrDefault("inputCols") + if model.isSet("inputCols") + else [model.getOrDefault("inputCol")] + ), + lambda model: ( + model.getOrDefault("outputCols") + if model.isSet("outputCols") + else [model.getOrDefault("outputCol")] + ), ), "pyspark.ml.feature.StringIndexerModel": ( - lambda model: model.getOrDefault("inputCols") - if model.isSet("inputCols") - else [model.getOrDefault("inputCol")], - lambda model: model.getOrDefault("outputCols") - if model.isSet("outputCols") - else [model.getOrDefault("outputCol")], + lambda model: ( + model.getOrDefault("inputCols") + if model.isSet("inputCols") + else [model.getOrDefault("inputCol")] + ), + lambda model: ( + model.getOrDefault("outputCols") + if model.isSet("outputCols") + else [model.getOrDefault("outputCol")] + ), ), "pyspark.ml.feature.VectorAssembler": ( lambda model: model.getOrDefault("inputCols"), diff --git a/onnxmltools/convert/xgboost/shape_calculators/Classifier.py b/onnxmltools/convert/xgboost/shape_calculators/Classifier.py index c64d9743..c245633f 100644 --- a/onnxmltools/convert/xgboost/shape_calculators/Classifier.py +++ b/onnxmltools/convert/xgboost/shape_calculators/Classifier.py @@ -27,7 +27,7 @@ def calculate_xgboost_classifier_output_shapes(operator): objective = params["objective"] n_estimators = get_n_estimators_classifier(xgb_node, params, js_trees) num_class = params.get("num_class", None) - + if objective == "binary:logistic": ncl = 2 elif num_class is not None: diff --git a/pyproject.toml b/pyproject.toml index 79f48d06..ae0a2b72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,10 +10,10 @@ exclude = [ # Same as Black. line-length = 95 -[tool.ruff.mccabe] +[tool.ruff.lint.mccabe] max-complexity = 10 -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "**/__init__.py" = ["F401"] "onnxmltools/convert/coreml/operator_converters/GLMClassifier.py" = ["E501"] "onnxmltools/convert/coreml/operator_converters/SVC.py" = ["E501"] diff --git a/tests/xgboost/test_xgboost_issues.py b/tests/xgboost/test_xgboost_issues.py index fac5a8f0..47a2e08b 100644 --- a/tests/xgboost/test_xgboost_issues.py +++ b/tests/xgboost/test_xgboost_issues.py @@ -41,7 +41,9 @@ def xgbregressor_shape_calculator(operator): initial_type = [("float_input", FloatTensorType([None, X.shape[1]]))] # Convert XGBoost model to ONNX - onnx_model = convert_sklearn(model, initial_types=initial_type, target_opset=12) + onnx_model = convert_sklearn( + model, initial_types=initial_type, target_opset={"": 12, "ai.onnx.ml": 3} + ) self.assertIn("dim_value: 2", str(onnx_model.graph.output)) sess = onnxruntime.InferenceSession( From aa26b73a960f5084908ea893b2b82a1b671dae0a Mon Sep 17 00:00:00 2001 From: tiago-rib-goncalves <159172975+tiago-rib-goncalves@users.noreply.github.com> Date: Fri, 17 May 2024 17:15:15 +0200 Subject: [PATCH 6/7] Update tree_ensemble_common.py (#691) See issue #688 Signed-off-by: tiago-rib-goncalves <159172975+tiago-rib-goncalves@users.noreply.github.com> --- .../convert/sparkml/operator_converters/tree_ensemble_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxmltools/convert/sparkml/operator_converters/tree_ensemble_common.py b/onnxmltools/convert/sparkml/operator_converters/tree_ensemble_common.py index 2e19c7c3..a31a7dc4 100644 --- a/onnxmltools/convert/sparkml/operator_converters/tree_ensemble_common.py +++ b/onnxmltools/convert/sparkml/operator_converters/tree_ensemble_common.py @@ -28,7 +28,7 @@ def sparkml_tree_dataset_to_sklearn(tree_df, is_classifier): if isinstance(item, dict): try: feature.append(item["featureIndex"]) - threshold.append(item["leftCategoriesOrThreshold"]) + threshold.append(item["leftCategoriesOrThreshold"][0] if len(item["leftCategoriesOrThreshold"]) >= 1 else -1.0) except KeyError: raise RuntimeError(f"Unable to process {item}.") else: From 3ae696aa763c7b25c2587c48441e47a4d0926a2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Wed, 22 May 2024 14:44:31 +0200 Subject: [PATCH 7/7] Extend CI to test with onnxruntime==1.18.0 (#692) * Extend CI to test with onnxruntime==1.18.0 Signed-off-by: Xavier Dupre * style Signed-off-by: Xavier Dupre * restore trigger on main Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * doc Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * remove unnecessary steps Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * fix ci Signed-off-by: Xavier Dupre * fix ci Signed-off-by: Xavier Dupre * fix issue with python 3.12 Signed-off-by: Xavier Dupre * fix Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * pin version for scikit-learn Signed-off-by: Xavier Dupre * skl Signed-off-by: Xavier Dupre * mac Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * precision Signed-off-by: Xavier Dupre * fix ci Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre --------- Signed-off-by: Xavier Dupre --- .azure-pipelines/linux-conda-CI.yml | 203 ------------------ .azure-pipelines/win32-conda-CI.yml | 192 ----------------- .github/workflows/ci.yml | 144 +++++++++++++ README.md | 10 +- onnxmltools/convert/README.md | 2 +- .../tree_ensemble_common.py | 6 +- .../xgboost/operator_converters/XGBoost.py | 8 +- onnxmltools/proto/__init__.py | 8 +- onnxmltools/utils/tests_helper.py | 16 +- requirements-dev.txt | 4 +- tests/xgboost/test_xgboost_converters.py | 2 +- 11 files changed, 174 insertions(+), 421 deletions(-) delete mode 100644 .azure-pipelines/linux-conda-CI.yml delete mode 100644 .azure-pipelines/win32-conda-CI.yml create mode 100644 .github/workflows/ci.yml diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml deleted file mode 100644 index f1d95382..00000000 --- a/.azure-pipelines/linux-conda-CI.yml +++ /dev/null @@ -1,203 +0,0 @@ -# Python package -# Create and test a Python package on multiple Python versions. -# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: -# https://docs.microsoft.com/azure/devops/pipelines/languages/python - -trigger: -- master - -jobs: - -- job: 'Test' - timeoutInMinutes: 30 - pool: - vmImage: 'ubuntu-latest' - strategy: - matrix: - - Python311-1160-RT1171-xgb2-lgbm40: - python.version: '3.11' - ONNX_PATH: 'onnx==1.16.0' - ONNXRT_PATH: 'onnxruntime==1.17.1' - COREML_PATH: NONE - lightgbm.version: '>=4.0' - xgboost.version: '>=2' - numpy.version: '' - scipy.version: '' - - Python311-1150-RT1163-xgb2-lgbm40: - python.version: '3.11' - ONNX_PATH: 'onnx==1.15.0' - ONNXRT_PATH: 'onnxruntime==1.16.3' - COREML_PATH: NONE - lightgbm.version: '>=4.0' - xgboost.version: '>=2' - numpy.version: '' - scipy.version: '' - - Python311-1150-RT1163-xgb175-lgbm40: - python.version: '3.11' - ONNX_PATH: 'onnx==1.15.0' - ONNXRT_PATH: 'onnxruntime==1.16.3' - COREML_PATH: NONE - lightgbm.version: '>=4.0' - xgboost.version: '>=1.7.5,<2' - numpy.version: '' - scipy.version: '' - - Python311-1141-RT1162-xgb175-lgbm40: - python.version: '3.11' - ONNX_PATH: 'onnx==1.14.1' - ONNXRT_PATH: 'onnxruntime==1.16.2' - COREML_PATH: NONE - lightgbm.version: '>=4.0' - xgboost.version: '>=1.7.5,<2' - numpy.version: '' - scipy.version: '' - - Python310-1141-RT1151-xgb175: - python.version: '3.10' - ONNX_PATH: 'onnx==1.14.1' - ONNXRT_PATH: 'onnxruntime==1.15.1' - COREML_PATH: NONE - lightgbm.version: '<4.0' - xgboost.version: '>=1.7.5,<2' - numpy.version: '' - scipy.version: '' - - Python310-1141-RT1140-xgb175: - python.version: '3.10' - ONNX_PATH: 'onnx==1.14.1' - ONNXRT_PATH: 'onnxruntime==1.14.0' - COREML_PATH: NONE - lightgbm.version: '<4.0' - xgboost.version: '>=1.7.5,<2' - numpy.version: '' - scipy.version: '' - - Python39-1141-RT1151-xgb175-scipy180: - python.version: '3.9' - ONNX_PATH: 'onnx==1.14.1' - ONNXRT_PATH: 'onnxruntime==1.15.1' - COREML_PATH: NONE - lightgbm.version: '>=4.0' - xgboost.version: '>=1.7.5,<2' - numpy.version: '' - scipy.version: '==1.8.0' - - - maxParallel: 3 - - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '$(python.version)' - architecture: 'x64' - - - script: | - python -m pip install --upgrade pip - pip install $(ONNX_PATH) $(ONNXRT_PATH) cython - pip install -r requirements.txt - displayName: 'Install dependencies' - - - script: | - pip install -r requirements-dev.txt - displayName: 'Install dependencies-dev' - - - script: | - python -m pip install --upgrade pip - pip install "xgboost$(xgboost.version)" - pip install "lightgbm$(lightgbm.version)" - pip install $(ONNX_PATH) - pip install $(ONNXRT_PATH) - pip install "numpy$(numpy.version)" - pip install "scipy$(scipy.version)" - displayName: 'Install xgboost, onnxruntime' - - - script: | - python -m pip install coloredlogs flatbuffers packaging sympy numpy protobuf - python -m pip install $(ONNXRT_PATH) - displayName: 'Install onnxruntime' - - - script: | - pip install -e . - displayName: 'local installation' - - - script: | - export PYTHONPATH=. - python -c "import onnxruntime;print('onnx:',onnx.__version__)" - python -c "import onnxconverter_common;print('cc:',onnxconverter_common.__version__)" - python -c "import onnx;print('onnx:',onnx.__version__)" - python -c "import onnxruntime;print('ort:',onnxruntime.__version__)" - python -c "import xgboost;print('xgboost:',xgboost.__version__)" - python -c "import lightgbm;print('lightgbm:',lightgbm.__version__)" - displayName: 'version' - - - script: | - export PYTHONPATH=. - pytest tests/baseline --durations=0 - displayName: 'pytest - baseline' - - - script: | - export PYTHONPATH=. - pytest tests/catboost --durations=0 - displayName: 'pytest - catboost' - - - script: | - export PYTHONPATH=. - pytest tests/lightgbm --durations=0 - displayName: 'pytest - lightgbm' - - - script: | - export PYTHONPATH=. - pytest tests/sparkml --durations=0 - displayName: 'pytest - sparkml' - - - script: | - export PYTHONPATH=. - pytest tests/utils --durations=0 - displayName: 'pytest - utils' - - - script: | - export PYTHONPATH=. - pytest tests/xgboost --durations=0 - displayName: 'pytest - xgboost' - - - script: | - export PYTHONPATH=. - pip install h2o - pytest tests/h2o --durations=0 - displayName: 'pytest - h2o' - - - script: | - export PYTHONPATH=. - pytest tests/svmlib --durations=0 - displayName: 'pytest - svmlib' - - - script: | - pip install torch --extra-index-url https://download.pytorch.org/whl/cpu - pip install hummingbird-ml --no-deps - displayName: 'Install hummingbird-ml' - - - script: | - export PYTHONPATH=. - pytest tests/hummingbirdml --durations=0 - displayName: 'pytest - hummingbirdml' - - - script: | - if [ '$(COREML_PATH)' == 'NONE' ] - then - echo "required version of coremltools does not work on python 3.10" - else - export PYTHONPATH=. - pip install $(COREML_PATH) - pytest tests/coreml --durations=0 - fi - displayName: 'pytest - coreml [$(COREML_PATH)]' - # condition: ne('$(COREML_PATH)', 'NONE') - - - task: PublishTestResults@2 - inputs: - testResultsFiles: '**/test-results.xml' - testRunTitle: 'Python $(python.version)' - condition: succeededOrFailed() diff --git a/.azure-pipelines/win32-conda-CI.yml b/.azure-pipelines/win32-conda-CI.yml deleted file mode 100644 index 4fbe704d..00000000 --- a/.azure-pipelines/win32-conda-CI.yml +++ /dev/null @@ -1,192 +0,0 @@ -# Python package -# Create and test a Python package on multiple Python versions. -# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: -# https://docs.microsoft.com/azure/devops/pipelines/languages/python - -trigger: -- master - -jobs: - -- job: 'Test' - timeoutInMinutes: 30 - pool: - vmImage: 'windows-latest' - strategy: - matrix: - - Python311-1160-RT1171: - python.version: '3.11' - ONNX_PATH: 'onnx==1.16.0' - ONNXRT_PATH: 'onnxruntime==1.17.1' - COREML_PATH: NONE - numpy.version: '' - xgboost.version: '>=2.0' - - Python311-1150-RT1163: - python.version: '3.11' - ONNX_PATH: 'onnx==1.15.0' - ONNXRT_PATH: 'onnxruntime==1.16.3' - COREML_PATH: NONE - numpy.version: '' - xgboost.version: '==2.0.2' - - Python311-1150-RT1162: - python.version: '3.11' - ONNX_PATH: 'onnx==1.15.0' - ONNXRT_PATH: 'onnxruntime==1.16.2' - COREML_PATH: NONE - numpy.version: '' - xgboost.version: '==2.0.2' - - Python311-1141-RT1162: - python.version: '3.11' - ONNX_PATH: 'onnx==1.14.1' - ONNXRT_PATH: 'onnxruntime==1.16.2' - COREML_PATH: NONE - numpy.version: '' - xgboost.version: '==1.7.5' - - Python310-1141-RT1151: - python.version: '3.10' - ONNX_PATH: 'onnx==1.14.1' - ONNXRT_PATH: 'onnxruntime==1.15.1' - COREML_PATH: NONE - numpy.version: '' - xgboost.version: '==1.7.5' - - Python310-1141-RT1140: - python.version: '3.10' - ONNX_PATH: 'onnx==1.14.1' - ONNXRT_PATH: onnxruntime==1.14.0 - COREML_PATH: NONE - numpy.version: '' - xgboost.version: '==1.7.5' - - Python39-1141-RT1140: - python.version: '3.9' - ONNX_PATH: 'onnx==1.14.1' - ONNXRT_PATH: onnxruntime==1.14.0 - COREML_PATH: NONE - numpy.version: '' - xgboost.version: '==1.7.5' - - maxParallel: 3 - - steps: - - powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts" - displayName: Add conda to PATH - - - script: conda create --yes --quiet --name py$(python.version) -c conda-forge python=$(python.version) numpy protobuf scikit-learn scipy cython - displayName: Create Anaconda environment - - - script: | - call activate py$(python.version) - python -m pip install --upgrade pip numpy - echo Test numpy installation... && python -c "import numpy" - python -m pip install -r requirements.txt - displayName: 'Install dependencies (1)' - - - script: | - call activate py$(python.version) - python -m pip install -r requirements-dev.txt - displayName: 'Install dependencies-dev' - - - script: | - call activate py$(python.version) - python -m pip install --upgrade scikit-learn - python -m pip install --upgrade lightgbm - python -m pip install "xgboost$(xgboost.version)" - displayName: 'Install scikit-learn' - - - script: | - call activate py$(python.version) - python -m pip install %ONNX_PATH% - python -m pip install %ONNXRT_PATH% - python -m pip install "numpy$(numpy.version)" - displayName: 'Install onnxruntime' - - - script: | - call activate py$(python.version) - python -m pip install coloredlogs flatbuffers packaging sympy numpy protobuf - python -m pip install $(ONNXRT_PATH) - displayName: 'Install ort-nightly' - - - script: | - call activate py$(python.version) - python -m pip install -e . - export PYTHONPATH=. - python -c "import onnxconverter_common;print(onnxconverter_common.__version__)" - python -c "import onnx;print(onnx.__version__)" - python -c "import onnxruntime;print(onnxruntime.__version__)" - python -c "import xgboost;print(xgboost.__version__)" - python -c "import lightgbm;print(lightgbm.__version__)" - displayName: 'version' - - - script: | - call activate py$(python.version) - export PYTHONPATH=. - python -m pytest tests/baseline --durations=0 - displayName: 'pytest baseline' - - - script: | - call activate py$(python.version) - export PYTHONPATH=. - python -m pytest tests/catboost --durations=0 - displayName: 'pytest catboost' - - - script: | - call activate py$(python.version) - set PYTHONPATH=. - if "$(COREML_PATH)" neq "NONE" python -m pip install %COREML_PATH% - if "$(COREML_PATH)" neq "NONE" python -m pytest tests/coreml --durations=0 - displayName: 'pytest coreml - [$(COREML_PATH)]' - #condition: ne('$(COREML_PATH)', 'NONE') - - - script: | - call activate py$(python.version) - export PYTHONPATH=. - python -m pytest tests/lightgbm --durations=0 - displayName: 'pytest lightgbm' - - - script: | - call activate py$(python.version) - export PYTHONPATH=. - python -m pytest tests/sparkml --durations=0 - displayName: 'pytest sparkml' - - - script: | - call activate py$(python.version) - export PYTHONPATH=. - python -m pytest tests/svmlib --durations=0 - displayName: 'pytest svmlib' - - - script: | - call activate py$(python.version) - export PYTHONPATH=. - python -m pytest tests/utils --durations=0 - displayName: 'pytest utils' - - - script: | - call activate py$(python.version) - export PYTHONPATH=. - python -m pytest tests/xgboost --durations=0 - displayName: 'pytest xgboost' - - - script: | - call activate py$(python.version) - python -m pip install torch - python -m pip install hummingbird-ml --no-deps - displayName: 'Install hummingbird-ml' - - - script: | - call activate py$(python.version) - export PYTHONPATH=. - python -m pytest tests/hummingbirdml --durations=0 - displayName: 'pytest hummingbirdml' - - - task: PublishTestResults@2 - inputs: - testResultsFiles: '**/test-results.xml' - testRunTitle: 'Python $(python.version)' - condition: succeededOrFailed() diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..0b51164f --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,144 @@ +name: CI +on: [push, pull_request] +jobs: + run: + name: ${{ matrix.os }} py==${{ matrix.python_version }} - sklearn${{ matrix.sklearn_version }} - ${{ matrix.onnxrt_version }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python_version: ['3.12', '3.11', '3.10', '3.9'] + include: + - python_version: '3.12' + documentation: 0 + numpy_version: '>=1.21.1' + scipy_version: '>=1.7.0' + onnx_version: 'onnx==1.16.0' + onnxrt_version: 'onnxruntime==1.18.0' + sklearn_version: '==1.4.2' + lgbm_version: ">=4" + xgboost_version: ">=2" + - python_version: '3.11' + documentation: 1 + numpy_version: '>=1.21.1' + scipy_version: '>=1.7.0' + onnx_version: 'onnx<1.16.0' + onnxrt_version: 'onnxruntime==1.17.3' + sklearn_version: '==1.4.2' + lgbm_version: ">=4" + xgboost_version: ">=2" + - python_version: '3.10' + documentation: 0 + numpy_version: '>=1.21.1' + scipy_version: '>=1.7.0' + onnx_version: 'onnx==1.14.1' + onnxrt_version: 'onnxruntime==1.16.3' + sklearn_version: '==1.4.2' + lgbm_version: "<4" + xgboost_version: "<2" + - python_version: '3.9' + documentation: 0 + numpy_version: '>=1.21.1' + scipy_version: '>=1.7.0' + onnx_version: 'onnx==1.13.0' + onnxrt_version: 'onnxruntime==1.14.1' + sklearn_version: '==1.4.2' + lgbm_version: "<4" + xgboost_version: "<2" + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python_version }} + + - name: Install requirements + run: python -m pip install -r requirements.txt + + - name: Install requirements dev + run: python -m pip install -r requirements-dev.txt + + - name: Cache pip + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('requirements-dev.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + ${{ runner.os }}- + + - name: Install specific versions + run: pip install "${{ matrix.onnx_version }}" "${{ matrix.onnxrt_version }}" "numpy${{ matrix.numpy_version }}" "scikit-learn${{ matrix.sklearn_version}}" "scipy${{ matrix.scipy_version }}" "xgboost${{ matrix.xgboost_version }}" + + - name: Install lightgbm + if: matrix.os != 'macos-latest' + run: pip install "lightgbm${{ matrix.lgbm_version }}" + + - name: Install + run: pip install -e . + + - name: versions + run: | + python -c "from numpy import __version__;print('numpy', __version__)" + python -c "from pandas import __version__;print('pandas', __version__)" + python -c "from scipy import __version__;print('scipy', __version__)" + python -c "from sklearn import __version__;print('sklearn', __version__)" + python -c "from onnxruntime import __version__;print('onnxruntime', __version__)" + python -c "from onnx import __version__;print('onnx', __version__)" + python -c "from xgboost import __version__;print('xgboost', __version__)" + python -c "from catboost import __version__;print('catboost', __version__)" + python -c "import onnx.defs;print('onnx_opset_version', onnx.defs.onnx_opset_version())" + + - name: versions lightgbm + if: matrix.os != 'macos-latest' + run: | + python -c "from lightgbm import __version__;print('lightgbm', __version__)" + + - name: Run tests baseline + run: pytest --maxfail=10 --durations=10 tests/baseline + + - name: Run tests utils + run: pytest --maxfail=10 --durations=10 tests/utils + + - name: Run tests catboost + run: pytest --maxfail=10 --durations=10 tests/catboost + + - name: Run tests lightgbm + if: matrix.os != 'macos-latest' + run: pytest --maxfail=10 --durations=10 tests/lightgbm + + - name: Run tests xgboost + run: pytest --maxfail=10 --durations=10 tests/xgboost + + - name: Run tests svmlib + run: pytest --maxfail=10 --durations=10 tests/svmlib + + - name: Run tests h2o + if: matrix.os == 'ubuntu-latest' + run: | + pip install h2o + pytest --maxfail=10 --durations=10 tests/h2o + + - name: Run tests pysparkml + if: matrix.os == 'ubuntu-latest' && matrix.python_version != '3.12' + run: pytest --maxfail=10 --durations=10 tests/sparkml + + - name: Run tests hummingbirdml + if: matrix.os != 'macos-latest' + run: | + pip install torch --extra-index-url https://download.pytorch.org/whl/cpu + pip install hummingbird-ml --no-deps + pytest --maxfail=10 --durations=10 tests/hummingbirdml + + - name: Run tests baseline + run: pytest --maxfail=10 --durations=10 tests/baseline + + - name: build + run: pip wheel . + + - uses: actions/upload-artifact@v4 + with: + path: ./dist/** diff --git a/README.md b/README.md index 2caba11f..6f2965d9 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,9 @@ # -![ONNXMLTools_logo_main](docs/ONNXMLTools_logo_main.png) +![ONNXMLTools_logo_main](https://github.com/onnx/onnxmltools/blob/main/docs/ONNXMLTools_logo_main.png) -| Linux | Windows | -|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [![Build Status](https://dev.azure.com/onnxmltools/onnxmltools/_apis/build/status/onnxmltools-linux-conda-ci?branchName=master)](https://dev.azure.com/onnxmltools/onnxmltools/_build/latest?definitionId=3?branchName=master) | [![Build Status](https://dev.azure.com/onnxmltools/onnxmltools/_apis/build/status/onnxmltools-win32-conda-ci?branchName=master)](https://dev.azure.com/onnxmltools/onnxmltools/_build/latest?definitionId=3?branchName=master) | +[![CI](https://github.com/onnx/onnxmltools/actions/workflows/ci.yml/badge.svg)](https://github.com/onnx/onnxmltools/actions/workflows/ci.yml) ## Introduction @@ -58,7 +56,7 @@ ONNXMLTools is tested with Python **3.7+**. # Examples -If you want the converted ONNX model to be compatible with a certain ONNX version, please specify the target_opset parameter upon invoking the convert function. The following Keras model conversion example demonstrates this below. You can identify the mapping from ONNX Operator Sets (referred to as opsets) to ONNX releases in the [versioning documentation](https://github.com/onnx/onnx/blob/master/docs/Versioning.md#released-versions). +If you want the converted ONNX model to be compatible with a certain ONNX version, please specify the target_opset parameter upon invoking the convert function. The following Keras model conversion example demonstrates this below. You can identify the mapping from ONNX Operator Sets (referred to as opsets) to ONNX releases in the [versioning documentation](https://github.com/onnx/onnx/blob/main/docs/Versioning.md#released-versions). ## Keras to ONNX Conversion @@ -142,7 +140,7 @@ opset_version = onnx_model.opset_import[0].version If the result from checking your ONNX model's opset is smaller than the `target_opset` number you specified in the onnxmltools.convert function, be assured that this is likely intended behavior. The ONNXMLTools converter works by converting each operator to the ONNX format individually and finding the corresponding opset version that it was most recently updated in. Once all of the operators are converted, the resultant ONNX model has the maximal opset version of all of its operators. -To illustrate this concretely, let's consider a model with two operators, Abs and Add. As of December 2018, [Abs](https://github.com/onnx/onnx/blob/master/docs/Operators.md#abs) was most recently updated in opset 6, and [Add](https://github.com/onnx/onnx/blob/master/docs/Operators.md#add) was most recently updated in opset 7. Therefore, the converted ONNX model's opset will always be 7, even if you request `target_opset=8`. The converter behavior was defined this way to ensure backwards compatibility. +To illustrate this concretely, let's consider a model with two operators, Abs and Add. As of December 2018, [Abs](https://github.com/onnx/onnx/blob/main/docs/Operators.md#abs) was most recently updated in opset 6, and [Add](https://github.com/onnx/onnx/blob/main/docs/Operators.md#add) was most recently updated in opset 7. Therefore, the converted ONNX model's opset will always be 7, even if you request `target_opset=8`. The converter behavior was defined this way to ensure backwards compatibility. Documentation for the [ONNX Model format](https://github.com/onnx/onnx) and more examples for converting models from different frameworks can be found in the [ONNX tutorials](https://github.com/onnx/tutorials) repository. diff --git a/onnxmltools/convert/README.md b/onnxmltools/convert/README.md index ebc828cb..ae77ab10 100644 --- a/onnxmltools/convert/README.md +++ b/onnxmltools/convert/README.md @@ -81,7 +81,7 @@ The shape mapping from Core ML to our IR obeys the following rules. Notice that the compiler can overwrite those rules at some stages like shape inference. An example is the label shape of a classifier. One may expect that its shape is `[1, 1].` Nevertheless, our shape inference may change it to `[1]`. The major reason is that the current definition of ZipMap, the operator used to generate the predicted probabilities, does not support batch size greater than one. Core ML's batch size, `N-axis`, is ignored because it is not related to graph structures. In fact, ONNX's batch size is rather equivalent to sequence axis in Core ML. By default, we use `N=1` for traditional machine learning models and `N='None'` for neural networks. To overwrite our default types, user can provide `initial_types` when calling `convert(...)` defined in `onnxmltools.convert.coreml.convert.py`. All Core ML's shape calculations are derived from [this document](https://apple.github.io/coremltools/coremlspecification/index.html) specifically for our type system. -Some more details about Core ML neural network operator can be found at this [page](https://github.com/apple/coremltools/blob/master/mlmodel/format/NeuralNetwork.proto) +Some more details about Core ML neural network operator can be found at this [page](https://github.com/apple/coremltools/blob/main/mlmodel/format/NeuralNetwork.proto) For scikit-learn, user may need to specify the input types for their models. In general, we expect `[1, C]` if the input is feature vector. diff --git a/onnxmltools/convert/sparkml/operator_converters/tree_ensemble_common.py b/onnxmltools/convert/sparkml/operator_converters/tree_ensemble_common.py index a31a7dc4..b4c9192e 100644 --- a/onnxmltools/convert/sparkml/operator_converters/tree_ensemble_common.py +++ b/onnxmltools/convert/sparkml/operator_converters/tree_ensemble_common.py @@ -28,7 +28,11 @@ def sparkml_tree_dataset_to_sklearn(tree_df, is_classifier): if isinstance(item, dict): try: feature.append(item["featureIndex"]) - threshold.append(item["leftCategoriesOrThreshold"][0] if len(item["leftCategoriesOrThreshold"]) >= 1 else -1.0) + threshold.append( + item["leftCategoriesOrThreshold"][0] + if len(item["leftCategoriesOrThreshold"]) >= 1 + else -1.0 + ) except KeyError: raise RuntimeError(f"Unable to process {item}.") else: diff --git a/onnxmltools/convert/xgboost/operator_converters/XGBoost.py b/onnxmltools/convert/xgboost/operator_converters/XGBoost.py index d241f186..81185428 100644 --- a/onnxmltools/convert/xgboost/operator_converters/XGBoost.py +++ b/onnxmltools/convert/xgboost/operator_converters/XGBoost.py @@ -120,10 +120,10 @@ def _add_node( # Split condition for sklearn # * if X_ptr[X_sample_stride * i + X_fx_stride * node.feature] <= node.threshold: - # * https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/tree/_tree.pyx#L946 + # * https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/tree/_tree.pyx#L946 # Split condition for xgboost # * if (fvalue < split_value) - # * https://github.com/dmlc/xgboost/blob/master/include/xgboost/tree_model.h#L804 + # * https://github.com/dmlc/xgboost/blob/main/include/xgboost/tree_model.h#L804 attr_pairs["nodes_treeids"].append(tree_id) attr_pairs["nodes_nodeids"].append(node_id) @@ -332,7 +332,7 @@ def convert(scope, operator, container): if ncl <= 1: ncl = 2 if objective != "binary:hinge": - # See https://github.com/dmlc/xgboost/blob/master/src/common/math.h#L23. + # See https://github.com/dmlc/xgboost/blob/main/src/common/math.h#L23. attr_pairs["post_transform"] = "LOGISTIC" attr_pairs["class_ids"] = [0 for v in attr_pairs["class_treeids"]] if js_trees[0].get("leaf", None) == 0: @@ -344,7 +344,7 @@ def convert(scope, operator, container): else: attr_pairs["base_values"] = [base_score] else: - # See https://github.com/dmlc/xgboost/blob/master/src/common/math.h#L35. + # See https://github.com/dmlc/xgboost/blob/main/src/common/math.h#L35. attr_pairs["post_transform"] = "SOFTMAX" attr_pairs["base_values"] = [base_score for n in range(ncl)] attr_pairs["class_ids"] = [v % ncl for v in attr_pairs["class_treeids"]] diff --git a/onnxmltools/proto/__init__.py b/onnxmltools/proto/__init__.py index a6bd5faf..e84b6419 100644 --- a/onnxmltools/proto/__init__.py +++ b/onnxmltools/proto/__init__.py @@ -15,13 +15,9 @@ def _check_onnx_version(): - import pkg_resources + from onnx import __version__ - min_required_version = pkg_resources.parse_version("1.0.1") - current_version = pkg_resources.get_distribution("onnx").parsed_version - assert ( - current_version >= min_required_version - ), "ONNXMLTools requires ONNX version 1.0.1 or a newer one" + return not __version__.startswith("0.") _check_onnx_version() diff --git a/onnxmltools/utils/tests_helper.py b/onnxmltools/utils/tests_helper.py index 7b5fd1d4..78d8eb81 100644 --- a/onnxmltools/utils/tests_helper.py +++ b/onnxmltools/utils/tests_helper.py @@ -88,10 +88,16 @@ def dump_data_and_model( os.makedirs(folder) if hasattr(model, "predict"): - import lightgbm - import xgboost - - if isinstance(model, lightgbm.Booster): + try: + import lightgbm + except ImportError: + lightgbm = None + try: + import xgboost + except ImportError: + xgboost = None + + if lightgbm is not None and isinstance(model, lightgbm.Booster): # LightGBM Booster model_dict = model.dump_model() if model_dict["objective"].startswith("binary"): @@ -105,7 +111,7 @@ def dump_data_and_model( prediction = [score.argmax(axis=1), score] else: prediction = [model.predict(data)] - elif isinstance(model, xgboost.Booster): + elif xgboost is not None and isinstance(model, xgboost.Booster): # XGBoost Booster from ..convert.xgboost._parse import _get_attributes from xgboost import DMatrix diff --git a/requirements-dev.txt b/requirements-dev.txt index 1ba960b8..1fe484ec 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,13 +3,13 @@ catboost cython dill libsvm -lightgbm +lightgbm; sys_platform != 'darwin' mleap numpy openpyxl onnxconverter-common pandas -pyspark +pyspark; sys_platform == 'linux' pytest pytest-cov pytest-spark diff --git a/tests/xgboost/test_xgboost_converters.py b/tests/xgboost/test_xgboost_converters.py index ab33e845..c364257a 100644 --- a/tests/xgboost/test_xgboost_converters.py +++ b/tests/xgboost/test_xgboost_converters.py @@ -173,7 +173,7 @@ def test_xgb_classifier_reglog(self): ) dump_data_and_model( - x_test, xgb, conv_model, basename="SklearnXGBClassifierRegLog" + x_test, xgb, conv_model, basename="SklearnXGBClassifierRegLog-Dec4" ) def test_xgb_classifier_multi_discrete_int_labels(self):