diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml deleted file mode 100644 index b45760ba..00000000 --- a/.azure-pipelines/linux-conda-CI.yml +++ /dev/null @@ -1,193 +0,0 @@ -# Python package -# Create and test a Python package on multiple Python versions. -# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: -# https://docs.microsoft.com/azure/devops/pipelines/languages/python - -trigger: -- master - -jobs: - -- job: 'Test' - timeoutInMinutes: 30 - pool: - vmImage: 'ubuntu-latest' - strategy: - matrix: - - Python311-1150-RT1163-xgb2-lgbm40: - python.version: '3.11' - ONNX_PATH: 'onnx==1.15.0' - ONNXRT_PATH: 'onnxruntime==1.16.3' - COREML_PATH: NONE - lightgbm.version: '>=4.0' - xgboost.version: '>=2' - numpy.version: '' - scipy.version: '' - - Python311-1150-RT1163-xgb175-lgbm40: - python.version: '3.11' - ONNX_PATH: 'onnx==1.15.0' - ONNXRT_PATH: 'onnxruntime==1.16.3' - COREML_PATH: NONE - lightgbm.version: '>=4.0' - xgboost.version: '>=1.7.5,<2' - numpy.version: '' - scipy.version: '' - - Python311-1141-RT1162-xgb175-lgbm40: - python.version: '3.11' - ONNX_PATH: 'onnx==1.14.1' - ONNXRT_PATH: 'onnxruntime==1.16.2' - COREML_PATH: NONE - lightgbm.version: '>=4.0' - xgboost.version: '>=1.7.5,<2' - numpy.version: '' - scipy.version: '' - - Python310-1141-RT1151-xgb175: - python.version: '3.10' - ONNX_PATH: 'onnx==1.14.1' - ONNXRT_PATH: 'onnxruntime==1.15.1' - COREML_PATH: NONE - lightgbm.version: '<4.0' - xgboost.version: '>=1.7.5,<2' - numpy.version: '' - scipy.version: '' - - Python310-1141-RT1140-xgb175: - python.version: '3.10' - ONNX_PATH: 'onnx==1.14.1' - ONNXRT_PATH: 'onnxruntime==1.14.0' - COREML_PATH: NONE - lightgbm.version: '<4.0' - xgboost.version: '>=1.7.5,<2' - numpy.version: '' - scipy.version: '' - - Python39-1141-RT1151-xgb175-scipy180: - python.version: '3.9' - ONNX_PATH: 'onnx==1.14.1' - ONNXRT_PATH: 'onnxruntime==1.15.1' - COREML_PATH: NONE - lightgbm.version: '>=4.0' - xgboost.version: '>=1.7.5,<2' - numpy.version: '' - scipy.version: '==1.8.0' - - - maxParallel: 3 - - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '$(python.version)' - architecture: 'x64' - - - script: | - python -m pip install --upgrade pip - pip install $(ONNX_PATH) $(ONNXRT_PATH) cython - pip install -r requirements.txt - displayName: 'Install dependencies' - - - script: | - pip install -r requirements-dev.txt - displayName: 'Install dependencies-dev' - - - script: | - python -m pip install --upgrade pip - pip install "xgboost$(xgboost.version)" - pip install "lightgbm$(lightgbm.version)" - pip install $(ONNX_PATH) - pip install $(ONNXRT_PATH) - pip install "numpy$(numpy.version)" - pip install "scipy$(scipy.version)" - displayName: 'Install xgboost, onnxruntime' - - - script: | - python -m pip install coloredlogs flatbuffers packaging sympy numpy protobuf - python -m pip install $(ONNXRT_PATH) - displayName: 'Install onnxruntime' - - - script: | - pip install -e . - displayName: 'local installation' - - - script: | - export PYTHONPATH=. - python -c "import onnxruntime;print('onnx:',onnx.__version__)" - python -c "import onnxconverter_common;print('cc:',onnxconverter_common.__version__)" - python -c "import onnx;print('onnx:',onnx.__version__)" - python -c "import onnxruntime;print('ort:',onnxruntime.__version__)" - python -c "import xgboost;print('xgboost:',xgboost.__version__)" - python -c "import lightgbm;print('lightgbm:',lightgbm.__version__)" - displayName: 'version' - - - script: | - export PYTHONPATH=. - pytest tests/baseline --durations=0 - displayName: 'pytest - baseline' - - - script: | - export PYTHONPATH=. - pytest tests/catboost --durations=0 - displayName: 'pytest - catboost' - - - script: | - export PYTHONPATH=. - pytest tests/lightgbm --durations=0 - displayName: 'pytest - lightgbm' - - - script: | - export PYTHONPATH=. - pytest tests/sparkml --durations=0 - displayName: 'pytest - sparkml' - - - script: | - export PYTHONPATH=. - pytest tests/utils --durations=0 - displayName: 'pytest - utils' - - - script: | - export PYTHONPATH=. - pytest tests/xgboost --durations=0 - displayName: 'pytest - xgboost' - - - script: | - export PYTHONPATH=. - pip install h2o - pytest tests/h2o --durations=0 - displayName: 'pytest - h2o' - - - script: | - export PYTHONPATH=. - pytest tests/svmlib --durations=0 - displayName: 'pytest - svmlib' - - - script: | - pip install torch --extra-index-url https://download.pytorch.org/whl/cpu - pip install hummingbird-ml --no-deps - displayName: 'Install hummingbird-ml' - - - script: | - export PYTHONPATH=. - pytest tests/hummingbirdml --durations=0 - displayName: 'pytest - hummingbirdml' - - - script: | - if [ '$(COREML_PATH)' == 'NONE' ] - then - echo "required version of coremltools does not work on python 3.10" - else - export PYTHONPATH=. - pip install $(COREML_PATH) - pytest tests/coreml --durations=0 - fi - displayName: 'pytest - coreml [$(COREML_PATH)]' - # condition: ne('$(COREML_PATH)', 'NONE') - - - task: PublishTestResults@2 - inputs: - testResultsFiles: '**/test-results.xml' - testRunTitle: 'Python $(python.version)' - condition: succeededOrFailed() diff --git a/.azure-pipelines/win32-conda-CI.yml b/.azure-pipelines/win32-conda-CI.yml deleted file mode 100644 index f3a42ba5..00000000 --- a/.azure-pipelines/win32-conda-CI.yml +++ /dev/null @@ -1,184 +0,0 @@ -# Python package -# Create and test a Python package on multiple Python versions. -# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: -# https://docs.microsoft.com/azure/devops/pipelines/languages/python - -trigger: -- master - -jobs: - -- job: 'Test' - timeoutInMinutes: 30 - pool: - vmImage: 'windows-latest' - strategy: - matrix: - - Python311-1150-RT1163: - python.version: '3.11' - ONNX_PATH: 'onnx==1.15.0' - ONNXRT_PATH: 'onnxruntime==1.16.3' - COREML_PATH: NONE - numpy.version: '' - xgboost.version: '2.0.2' - - Python311-1150-RT1162: - python.version: '3.11' - ONNX_PATH: 'onnx==1.15.0' - ONNXRT_PATH: 'onnxruntime==1.16.2' - COREML_PATH: NONE - numpy.version: '' - xgboost.version: '2.0.2' - - Python311-1141-RT1162: - python.version: '3.11' - ONNX_PATH: 'onnx==1.14.1' - ONNXRT_PATH: 'onnxruntime==1.16.2' - COREML_PATH: NONE - numpy.version: '' - xgboost.version: '1.7.5' - - Python310-1141-RT1151: - python.version: '3.10' - ONNX_PATH: 'onnx==1.14.1' - ONNXRT_PATH: 'onnxruntime==1.15.1' - COREML_PATH: NONE - numpy.version: '' - xgboost.version: '1.7.5' - - Python310-1141-RT1140: - python.version: '3.10' - ONNX_PATH: 'onnx==1.14.1' - ONNXRT_PATH: onnxruntime==1.14.0 - COREML_PATH: NONE - numpy.version: '' - xgboost.version: '1.7.5' - - Python39-1141-RT1140: - python.version: '3.9' - ONNX_PATH: 'onnx==1.14.1' - ONNXRT_PATH: onnxruntime==1.14.0 - COREML_PATH: NONE - numpy.version: '' - xgboost.version: '1.7.5' - - maxParallel: 3 - - steps: - - powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts" - displayName: Add conda to PATH - - - script: conda create --yes --quiet --name py$(python.version) -c conda-forge python=$(python.version) numpy protobuf scikit-learn scipy cython - displayName: Create Anaconda environment - - - script: | - call activate py$(python.version) - python -m pip install --upgrade pip numpy - echo Test numpy installation... && python -c "import numpy" - python -m pip install -r requirements.txt - displayName: 'Install dependencies (1)' - - - script: | - call activate py$(python.version) - python -m pip install -r requirements-dev.txt - displayName: 'Install dependencies-dev' - - - script: | - call activate py$(python.version) - python -m pip install --upgrade scikit-learn - python -m pip install --upgrade lightgbm - python -m pip install "xgboost==$(xgboost.version)" - displayName: 'Install scikit-learn' - - - script: | - call activate py$(python.version) - python -m pip install %ONNX_PATH% - python -m pip install %ONNXRT_PATH% - python -m pip install "numpy$(numpy.version)" - displayName: 'Install onnxruntime' - - - script: | - call activate py$(python.version) - python -m pip install coloredlogs flatbuffers packaging sympy numpy protobuf - python -m pip install $(ONNXRT_PATH) - displayName: 'Install ort-nightly' - - - script: | - call activate py$(python.version) - python -m pip install -e . - export PYTHONPATH=. - python -c "import onnxconverter_common;print(onnxconverter_common.__version__)" - python -c "import onnx;print(onnx.__version__)" - python -c "import onnxruntime;print(onnxruntime.__version__)" - python -c "import xgboost;print(xgboost.__version__)" - python -c "import lightgbm;print(lightgbm.__version__)" - displayName: 'version' - - - script: | - call activate py$(python.version) - export PYTHONPATH=. - python -m pytest tests/baseline --durations=0 - displayName: 'pytest baseline' - - - script: | - call activate py$(python.version) - export PYTHONPATH=. - python -m pytest tests/catboost --durations=0 - displayName: 'pytest catboost' - - - script: | - call activate py$(python.version) - set PYTHONPATH=. - if "$(COREML_PATH)" neq "NONE" python -m pip install %COREML_PATH% - if "$(COREML_PATH)" neq "NONE" python -m pytest tests/coreml --durations=0 - displayName: 'pytest coreml - [$(COREML_PATH)]' - #condition: ne('$(COREML_PATH)', 'NONE') - - - script: | - call activate py$(python.version) - export PYTHONPATH=. - python -m pytest tests/lightgbm --durations=0 - displayName: 'pytest lightgbm' - - - script: | - call activate py$(python.version) - export PYTHONPATH=. - python -m pytest tests/sparkml --durations=0 - displayName: 'pytest sparkml' - - - script: | - call activate py$(python.version) - export PYTHONPATH=. - python -m pytest tests/svmlib --durations=0 - displayName: 'pytest svmlib' - - - script: | - call activate py$(python.version) - export PYTHONPATH=. - python -m pytest tests/utils --durations=0 - displayName: 'pytest utils' - - - script: | - call activate py$(python.version) - export PYTHONPATH=. - python -m pytest tests/xgboost --durations=0 - displayName: 'pytest xgboost' - - - script: | - call activate py$(python.version) - python -m pip install torch - python -m pip install hummingbird-ml --no-deps - displayName: 'Install hummingbird-ml' - - - script: | - call activate py$(python.version) - export PYTHONPATH=. - python -m pytest tests/hummingbirdml --durations=0 - displayName: 'pytest hummingbirdml' - - - task: PublishTestResults@2 - inputs: - testResultsFiles: '**/test-results.xml' - testRunTitle: 'Python $(python.version)' - condition: succeededOrFailed() diff --git a/.github/workflows/black-ruff.yml b/.github/workflows/black-ruff.yml index 09da3fc3..dc846df3 100644 --- a/.github/workflows/black-ruff.yml +++ b/.github/workflows/black-ruff.yml @@ -5,7 +5,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - uses: psf/black@193ee766ca496871f93621d6b58d57a6564ff81b # stable 23.7.0 + - uses: psf/black@e42f1ffbe159018c8e887f775c0fd7b51b3cf787 # stable 24.3.0 with: options: "--diff --check" src: "." diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..0b51164f --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,144 @@ +name: CI +on: [push, pull_request] +jobs: + run: + name: ${{ matrix.os }} py==${{ matrix.python_version }} - sklearn${{ matrix.sklearn_version }} - ${{ matrix.onnxrt_version }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python_version: ['3.12', '3.11', '3.10', '3.9'] + include: + - python_version: '3.12' + documentation: 0 + numpy_version: '>=1.21.1' + scipy_version: '>=1.7.0' + onnx_version: 'onnx==1.16.0' + onnxrt_version: 'onnxruntime==1.18.0' + sklearn_version: '==1.4.2' + lgbm_version: ">=4" + xgboost_version: ">=2" + - python_version: '3.11' + documentation: 1 + numpy_version: '>=1.21.1' + scipy_version: '>=1.7.0' + onnx_version: 'onnx<1.16.0' + onnxrt_version: 'onnxruntime==1.17.3' + sklearn_version: '==1.4.2' + lgbm_version: ">=4" + xgboost_version: ">=2" + - python_version: '3.10' + documentation: 0 + numpy_version: '>=1.21.1' + scipy_version: '>=1.7.0' + onnx_version: 'onnx==1.14.1' + onnxrt_version: 'onnxruntime==1.16.3' + sklearn_version: '==1.4.2' + lgbm_version: "<4" + xgboost_version: "<2" + - python_version: '3.9' + documentation: 0 + numpy_version: '>=1.21.1' + scipy_version: '>=1.7.0' + onnx_version: 'onnx==1.13.0' + onnxrt_version: 'onnxruntime==1.14.1' + sklearn_version: '==1.4.2' + lgbm_version: "<4" + xgboost_version: "<2" + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python_version }} + + - name: Install requirements + run: python -m pip install -r requirements.txt + + - name: Install requirements dev + run: python -m pip install -r requirements-dev.txt + + - name: Cache pip + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('requirements-dev.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + ${{ runner.os }}- + + - name: Install specific versions + run: pip install "${{ matrix.onnx_version }}" "${{ matrix.onnxrt_version }}" "numpy${{ matrix.numpy_version }}" "scikit-learn${{ matrix.sklearn_version}}" "scipy${{ matrix.scipy_version }}" "xgboost${{ matrix.xgboost_version }}" + + - name: Install lightgbm + if: matrix.os != 'macos-latest' + run: pip install "lightgbm${{ matrix.lgbm_version }}" + + - name: Install + run: pip install -e . + + - name: versions + run: | + python -c "from numpy import __version__;print('numpy', __version__)" + python -c "from pandas import __version__;print('pandas', __version__)" + python -c "from scipy import __version__;print('scipy', __version__)" + python -c "from sklearn import __version__;print('sklearn', __version__)" + python -c "from onnxruntime import __version__;print('onnxruntime', __version__)" + python -c "from onnx import __version__;print('onnx', __version__)" + python -c "from xgboost import __version__;print('xgboost', __version__)" + python -c "from catboost import __version__;print('catboost', __version__)" + python -c "import onnx.defs;print('onnx_opset_version', onnx.defs.onnx_opset_version())" + + - name: versions lightgbm + if: matrix.os != 'macos-latest' + run: | + python -c "from lightgbm import __version__;print('lightgbm', __version__)" + + - name: Run tests baseline + run: pytest --maxfail=10 --durations=10 tests/baseline + + - name: Run tests utils + run: pytest --maxfail=10 --durations=10 tests/utils + + - name: Run tests catboost + run: pytest --maxfail=10 --durations=10 tests/catboost + + - name: Run tests lightgbm + if: matrix.os != 'macos-latest' + run: pytest --maxfail=10 --durations=10 tests/lightgbm + + - name: Run tests xgboost + run: pytest --maxfail=10 --durations=10 tests/xgboost + + - name: Run tests svmlib + run: pytest --maxfail=10 --durations=10 tests/svmlib + + - name: Run tests h2o + if: matrix.os == 'ubuntu-latest' + run: | + pip install h2o + pytest --maxfail=10 --durations=10 tests/h2o + + - name: Run tests pysparkml + if: matrix.os == 'ubuntu-latest' && matrix.python_version != '3.12' + run: pytest --maxfail=10 --durations=10 tests/sparkml + + - name: Run tests hummingbirdml + if: matrix.os != 'macos-latest' + run: | + pip install torch --extra-index-url https://download.pytorch.org/whl/cpu + pip install hummingbird-ml --no-deps + pytest --maxfail=10 --durations=10 tests/hummingbirdml + + - name: Run tests baseline + run: pytest --maxfail=10 --durations=10 tests/baseline + + - name: build + run: pip wheel . + + - uses: actions/upload-artifact@v4 + with: + path: ./dist/** diff --git a/CHANGELOGS.md b/CHANGELOGS.md index 1fe94578..2920b296 100644 --- a/CHANGELOGS.md +++ b/CHANGELOGS.md @@ -1,5 +1,14 @@ # Change Logs +## 1.13.0 (development) + +* Handle issue with binary classifier setting output to [N,1] vs [N,2], + [#681](https://github.com/onnx/onnxmltools/pull/681) +* Add missing dependency onnxconverter_common, fix multi regression with xgboost, + [#679](https://github.com/onnx/onnxmltools/pull/679), + fixes issues [No module named 'onnxconverter_common'](https://github.com/onnx/onnxmltools/issues/673), + [onnx converted : xgboostRegressor multioutput model predicts 1 dimension instead of original 210 dimensions.](https://github.com/onnx/onnxmltools/issues/676) + ## 1.12.0 * Fix early stopping for XGBClassifier and xgboost > 2 diff --git a/README.md b/README.md index 2caba11f..6f2965d9 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,9 @@ # -![ONNXMLTools_logo_main](docs/ONNXMLTools_logo_main.png) +![ONNXMLTools_logo_main](https://github.com/onnx/onnxmltools/blob/main/docs/ONNXMLTools_logo_main.png) -| Linux | Windows | -|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [![Build Status](https://dev.azure.com/onnxmltools/onnxmltools/_apis/build/status/onnxmltools-linux-conda-ci?branchName=master)](https://dev.azure.com/onnxmltools/onnxmltools/_build/latest?definitionId=3?branchName=master) | [![Build Status](https://dev.azure.com/onnxmltools/onnxmltools/_apis/build/status/onnxmltools-win32-conda-ci?branchName=master)](https://dev.azure.com/onnxmltools/onnxmltools/_build/latest?definitionId=3?branchName=master) | +[![CI](https://github.com/onnx/onnxmltools/actions/workflows/ci.yml/badge.svg)](https://github.com/onnx/onnxmltools/actions/workflows/ci.yml) ## Introduction @@ -58,7 +56,7 @@ ONNXMLTools is tested with Python **3.7+**. # Examples -If you want the converted ONNX model to be compatible with a certain ONNX version, please specify the target_opset parameter upon invoking the convert function. The following Keras model conversion example demonstrates this below. You can identify the mapping from ONNX Operator Sets (referred to as opsets) to ONNX releases in the [versioning documentation](https://github.com/onnx/onnx/blob/master/docs/Versioning.md#released-versions). +If you want the converted ONNX model to be compatible with a certain ONNX version, please specify the target_opset parameter upon invoking the convert function. The following Keras model conversion example demonstrates this below. You can identify the mapping from ONNX Operator Sets (referred to as opsets) to ONNX releases in the [versioning documentation](https://github.com/onnx/onnx/blob/main/docs/Versioning.md#released-versions). ## Keras to ONNX Conversion @@ -142,7 +140,7 @@ opset_version = onnx_model.opset_import[0].version If the result from checking your ONNX model's opset is smaller than the `target_opset` number you specified in the onnxmltools.convert function, be assured that this is likely intended behavior. The ONNXMLTools converter works by converting each operator to the ONNX format individually and finding the corresponding opset version that it was most recently updated in. Once all of the operators are converted, the resultant ONNX model has the maximal opset version of all of its operators. -To illustrate this concretely, let's consider a model with two operators, Abs and Add. As of December 2018, [Abs](https://github.com/onnx/onnx/blob/master/docs/Operators.md#abs) was most recently updated in opset 6, and [Add](https://github.com/onnx/onnx/blob/master/docs/Operators.md#add) was most recently updated in opset 7. Therefore, the converted ONNX model's opset will always be 7, even if you request `target_opset=8`. The converter behavior was defined this way to ensure backwards compatibility. +To illustrate this concretely, let's consider a model with two operators, Abs and Add. As of December 2018, [Abs](https://github.com/onnx/onnx/blob/main/docs/Operators.md#abs) was most recently updated in opset 6, and [Add](https://github.com/onnx/onnx/blob/main/docs/Operators.md#add) was most recently updated in opset 7. Therefore, the converted ONNX model's opset will always be 7, even if you request `target_opset=8`. The converter behavior was defined this way to ensure backwards compatibility. Documentation for the [ONNX Model format](https://github.com/onnx/onnx) and more examples for converting models from different frameworks can be found in the [ONNX tutorials](https://github.com/onnx/tutorials) repository. diff --git a/onnxmltools/__init__.py b/onnxmltools/__init__.py index 7015cd04..b898fe11 100644 --- a/onnxmltools/__init__.py +++ b/onnxmltools/__init__.py @@ -5,7 +5,7 @@ This framework converts any machine learned model into onnx format which is a common language to describe any machine learned model. """ -__version__ = "1.12.0" +__version__ = "1.13.0" __author__ = "ONNX" __producer__ = "OnnxMLTools" __producer_version__ = __version__ diff --git a/onnxmltools/convert/README.md b/onnxmltools/convert/README.md index ebc828cb..ae77ab10 100644 --- a/onnxmltools/convert/README.md +++ b/onnxmltools/convert/README.md @@ -81,7 +81,7 @@ The shape mapping from Core ML to our IR obeys the following rules. Notice that the compiler can overwrite those rules at some stages like shape inference. An example is the label shape of a classifier. One may expect that its shape is `[1, 1].` Nevertheless, our shape inference may change it to `[1]`. The major reason is that the current definition of ZipMap, the operator used to generate the predicted probabilities, does not support batch size greater than one. Core ML's batch size, `N-axis`, is ignored because it is not related to graph structures. In fact, ONNX's batch size is rather equivalent to sequence axis in Core ML. By default, we use `N=1` for traditional machine learning models and `N='None'` for neural networks. To overwrite our default types, user can provide `initial_types` when calling `convert(...)` defined in `onnxmltools.convert.coreml.convert.py`. All Core ML's shape calculations are derived from [this document](https://apple.github.io/coremltools/coremlspecification/index.html) specifically for our type system. -Some more details about Core ML neural network operator can be found at this [page](https://github.com/apple/coremltools/blob/master/mlmodel/format/NeuralNetwork.proto) +Some more details about Core ML neural network operator can be found at this [page](https://github.com/apple/coremltools/blob/main/mlmodel/format/NeuralNetwork.proto) For scikit-learn, user may need to specify the input types for their models. In general, we expect `[1, C]` if the input is feature vector. diff --git a/onnxmltools/convert/coreml/operator_converters/TreeEnsemble.py b/onnxmltools/convert/coreml/operator_converters/TreeEnsemble.py index db52cc01..6dda3fbf 100644 --- a/onnxmltools/convert/coreml/operator_converters/TreeEnsemble.py +++ b/onnxmltools/convert/coreml/operator_converters/TreeEnsemble.py @@ -45,9 +45,9 @@ def convert_tree_ensemble_model(scope, operator, container): op_type = "TreeEnsembleClassifier" prefix = "class" nodes = raw_model.treeEnsembleClassifier.treeEnsemble.nodes - attrs[ - "base_values" - ] = raw_model.treeEnsembleClassifier.treeEnsemble.basePredictionValue + attrs["base_values"] = ( + raw_model.treeEnsembleClassifier.treeEnsemble.basePredictionValue + ) attrs["post_transform"] = get_onnx_tree_post_transform( raw_model.treeEnsembleClassifier.postEvaluationTransform ) @@ -72,12 +72,12 @@ def convert_tree_ensemble_model(scope, operator, container): op_type = "TreeEnsembleRegressor" prefix = "target" nodes = raw_model.treeEnsembleRegressor.treeEnsemble.nodes - attrs[ - "base_values" - ] = raw_model.treeEnsembleRegressor.treeEnsemble.basePredictionValue - attrs[ - "n_targets" - ] = raw_model.treeEnsembleRegressor.treeEnsemble.numPredictionDimensions + attrs["base_values"] = ( + raw_model.treeEnsembleRegressor.treeEnsemble.basePredictionValue + ) + attrs["n_targets"] = ( + raw_model.treeEnsembleRegressor.treeEnsemble.numPredictionDimensions + ) attrs["post_transform"] = get_onnx_tree_post_transform( raw_model.treeEnsembleRegressor.postEvaluationTransform ) diff --git a/onnxmltools/convert/sparkml/operator_converters/tree_ensemble_common.py b/onnxmltools/convert/sparkml/operator_converters/tree_ensemble_common.py index 2e19c7c3..b4c9192e 100644 --- a/onnxmltools/convert/sparkml/operator_converters/tree_ensemble_common.py +++ b/onnxmltools/convert/sparkml/operator_converters/tree_ensemble_common.py @@ -28,7 +28,11 @@ def sparkml_tree_dataset_to_sklearn(tree_df, is_classifier): if isinstance(item, dict): try: feature.append(item["featureIndex"]) - threshold.append(item["leftCategoriesOrThreshold"]) + threshold.append( + item["leftCategoriesOrThreshold"][0] + if len(item["leftCategoriesOrThreshold"]) >= 1 + else -1.0 + ) except KeyError: raise RuntimeError(f"Unable to process {item}.") else: diff --git a/onnxmltools/convert/sparkml/ops_input_output.py b/onnxmltools/convert/sparkml/ops_input_output.py index ae241667..d74f95dd 100644 --- a/onnxmltools/convert/sparkml/ops_input_output.py +++ b/onnxmltools/convert/sparkml/ops_input_output.py @@ -134,12 +134,16 @@ def build_io_name_map(): lambda model: [model.getOrDefault("predictionCol")], ), "pyspark.ml.feature.ImputerModel": ( - lambda model: model.getOrDefault("inputCols") - if model.isSet("inputCols") - else [model.getOrDefault("inputCol")], - lambda model: model.getOrDefault("outputCols") - if model.isSet("outputCols") - else [model.getOrDefault("outputCol")], + lambda model: ( + model.getOrDefault("inputCols") + if model.isSet("inputCols") + else [model.getOrDefault("inputCol")] + ), + lambda model: ( + model.getOrDefault("outputCols") + if model.isSet("outputCols") + else [model.getOrDefault("outputCol")] + ), ), "pyspark.ml.feature.MaxAbsScalerModel": ( lambda model: [model.getOrDefault("inputCol")], @@ -177,20 +181,28 @@ def build_io_name_map(): ], ), "pyspark.ml.feature.OneHotEncoderModel": ( - lambda model: model.getOrDefault("inputCols") - if model.isSet("inputCols") - else [model.getOrDefault("inputCol")], - lambda model: model.getOrDefault("outputCols") - if model.isSet("outputCols") - else [model.getOrDefault("outputCol")], + lambda model: ( + model.getOrDefault("inputCols") + if model.isSet("inputCols") + else [model.getOrDefault("inputCol")] + ), + lambda model: ( + model.getOrDefault("outputCols") + if model.isSet("outputCols") + else [model.getOrDefault("outputCol")] + ), ), "pyspark.ml.feature.StringIndexerModel": ( - lambda model: model.getOrDefault("inputCols") - if model.isSet("inputCols") - else [model.getOrDefault("inputCol")], - lambda model: model.getOrDefault("outputCols") - if model.isSet("outputCols") - else [model.getOrDefault("outputCol")], + lambda model: ( + model.getOrDefault("inputCols") + if model.isSet("inputCols") + else [model.getOrDefault("inputCol")] + ), + lambda model: ( + model.getOrDefault("outputCols") + if model.isSet("outputCols") + else [model.getOrDefault("outputCol")] + ), ), "pyspark.ml.feature.VectorAssembler": ( lambda model: model.getOrDefault("inputCols"), diff --git a/onnxmltools/convert/xgboost/common.py b/onnxmltools/convert/xgboost/common.py index 3a79aaad..4a10a48f 100644 --- a/onnxmltools/convert/xgboost/common.py +++ b/onnxmltools/convert/xgboost/common.py @@ -32,6 +32,12 @@ def get_xgb_params(xgb_node): bs = float(config["learner"]["learner_model_param"]["base_score"]) # xgboost >= 2.0 params["base_score"] = bs + if "num_target" in config["learner"]["learner_model_param"]: + params["n_targets"] = int( + config["learner"]["learner_model_param"]["num_target"] + ) + else: + params["n_targets"] = 1 bst = xgb_node.get_booster() if hasattr(bst, "best_ntree_limit"): diff --git a/onnxmltools/convert/xgboost/operator_converters/XGBoost.py b/onnxmltools/convert/xgboost/operator_converters/XGBoost.py index 904630b1..81185428 100644 --- a/onnxmltools/convert/xgboost/operator_converters/XGBoost.py +++ b/onnxmltools/convert/xgboost/operator_converters/XGBoost.py @@ -120,10 +120,10 @@ def _add_node( # Split condition for sklearn # * if X_ptr[X_sample_stride * i + X_fx_stride * node.feature] <= node.threshold: - # * https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/tree/_tree.pyx#L946 + # * https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/tree/_tree.pyx#L946 # Split condition for xgboost # * if (fvalue < split_value) - # * https://github.com/dmlc/xgboost/blob/master/include/xgboost/tree_model.h#L804 + # * https://github.com/dmlc/xgboost/blob/main/include/xgboost/tree_model.h#L804 attr_pairs["nodes_treeids"].append(tree_id) attr_pairs["nodes_nodeids"].append(node_id) @@ -254,6 +254,9 @@ def convert(scope, operator, container): js_trees, attr_pairs, [1 for _ in js_trees], False ) + params = XGBConverter.get_xgb_params(xgb_node) + attr_pairs["n_targets"] = params["n_targets"] + # add nodes if objective == "count:poisson": names = [scope.get_unique_variable_name("tree")] @@ -329,7 +332,7 @@ def convert(scope, operator, container): if ncl <= 1: ncl = 2 if objective != "binary:hinge": - # See https://github.com/dmlc/xgboost/blob/master/src/common/math.h#L23. + # See https://github.com/dmlc/xgboost/blob/main/src/common/math.h#L23. attr_pairs["post_transform"] = "LOGISTIC" attr_pairs["class_ids"] = [0 for v in attr_pairs["class_treeids"]] if js_trees[0].get("leaf", None) == 0: @@ -341,7 +344,7 @@ def convert(scope, operator, container): else: attr_pairs["base_values"] = [base_score] else: - # See https://github.com/dmlc/xgboost/blob/master/src/common/math.h#L35. + # See https://github.com/dmlc/xgboost/blob/main/src/common/math.h#L35. attr_pairs["post_transform"] = "SOFTMAX" attr_pairs["base_values"] = [base_score for n in range(ncl)] attr_pairs["class_ids"] = [v % ncl for v in attr_pairs["class_treeids"]] diff --git a/onnxmltools/convert/xgboost/shape_calculators/Classifier.py b/onnxmltools/convert/xgboost/shape_calculators/Classifier.py index e44d5a52..c245633f 100644 --- a/onnxmltools/convert/xgboost/shape_calculators/Classifier.py +++ b/onnxmltools/convert/xgboost/shape_calculators/Classifier.py @@ -28,11 +28,11 @@ def calculate_xgboost_classifier_output_shapes(operator): n_estimators = get_n_estimators_classifier(xgb_node, params, js_trees) num_class = params.get("num_class", None) - if num_class is not None: + if objective == "binary:logistic": + ncl = 2 + elif num_class is not None: ncl = num_class n_estimators = ntrees // ncl - elif objective == "binary:logistic": - ncl = 2 else: ncl = ntrees // n_estimators if objective == "reg:logistic" and ncl == 1: @@ -46,7 +46,7 @@ def calculate_xgboost_classifier_output_shapes(operator): operator.outputs[0].type = Int64TensorType(shape=[N]) else: operator.outputs[0].type = StringTensorType(shape=[N]) - operator.outputs[1].type = operator.outputs[1].type = FloatTensorType([N, ncl]) + operator.outputs[1].type = FloatTensorType([N, ncl]) register_shape_calculator("XGBClassifier", calculate_xgboost_classifier_output_shapes) diff --git a/onnxmltools/proto/__init__.py b/onnxmltools/proto/__init__.py index a6bd5faf..e84b6419 100644 --- a/onnxmltools/proto/__init__.py +++ b/onnxmltools/proto/__init__.py @@ -15,13 +15,9 @@ def _check_onnx_version(): - import pkg_resources + from onnx import __version__ - min_required_version = pkg_resources.parse_version("1.0.1") - current_version = pkg_resources.get_distribution("onnx").parsed_version - assert ( - current_version >= min_required_version - ), "ONNXMLTools requires ONNX version 1.0.1 or a newer one" + return not __version__.startswith("0.") _check_onnx_version() diff --git a/onnxmltools/utils/tests_helper.py b/onnxmltools/utils/tests_helper.py index 7b5fd1d4..78d8eb81 100644 --- a/onnxmltools/utils/tests_helper.py +++ b/onnxmltools/utils/tests_helper.py @@ -88,10 +88,16 @@ def dump_data_and_model( os.makedirs(folder) if hasattr(model, "predict"): - import lightgbm - import xgboost - - if isinstance(model, lightgbm.Booster): + try: + import lightgbm + except ImportError: + lightgbm = None + try: + import xgboost + except ImportError: + xgboost = None + + if lightgbm is not None and isinstance(model, lightgbm.Booster): # LightGBM Booster model_dict = model.dump_model() if model_dict["objective"].startswith("binary"): @@ -105,7 +111,7 @@ def dump_data_and_model( prediction = [score.argmax(axis=1), score] else: prediction = [model.predict(data)] - elif isinstance(model, xgboost.Booster): + elif xgboost is not None and isinstance(model, xgboost.Booster): # XGBoost Booster from ..convert.xgboost._parse import _get_attributes from xgboost import DMatrix diff --git a/pyproject.toml b/pyproject.toml index 79f48d06..ae0a2b72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,10 +10,10 @@ exclude = [ # Same as Black. line-length = 95 -[tool.ruff.mccabe] +[tool.ruff.lint.mccabe] max-complexity = 10 -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "**/__init__.py" = ["F401"] "onnxmltools/convert/coreml/operator_converters/GLMClassifier.py" = ["E501"] "onnxmltools/convert/coreml/operator_converters/SVC.py" = ["E501"] diff --git a/requirements-dev.txt b/requirements-dev.txt index cfd46ddc..1fe484ec 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,12 +3,13 @@ catboost cython dill libsvm -lightgbm +lightgbm; sys_platform != 'darwin' mleap numpy openpyxl +onnxconverter-common pandas -pyspark +pyspark; sys_platform == 'linux' pytest pytest-cov pytest-spark diff --git a/requirements.txt b/requirements.txt index 308201c6..b1b42f87 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ numpy onnx +onnxconverter-common diff --git a/tests/xgboost/test_xgboost_converters.py b/tests/xgboost/test_xgboost_converters.py index ab33e845..c364257a 100644 --- a/tests/xgboost/test_xgboost_converters.py +++ b/tests/xgboost/test_xgboost_converters.py @@ -173,7 +173,7 @@ def test_xgb_classifier_reglog(self): ) dump_data_and_model( - x_test, xgb, conv_model, basename="SklearnXGBClassifierRegLog" + x_test, xgb, conv_model, basename="SklearnXGBClassifierRegLog-Dec4" ) def test_xgb_classifier_multi_discrete_int_labels(self): diff --git a/tests/xgboost/test_xgboost_converters_base_score.py b/tests/xgboost/test_xgboost_converters_base_score.py index 27d40172..528560d8 100644 --- a/tests/xgboost/test_xgboost_converters_base_score.py +++ b/tests/xgboost/test_xgboost_converters_base_score.py @@ -103,7 +103,7 @@ def test_xgbclassifier_sparse_base_score(self): assert_almost_equal(expected.reshape((-1, 2)), got, decimal=4) def test_xgbclassifier_sparse_no_base_score(self): - X, y = make_regression(n_samples=200, n_features=10, random_state=0) + X, y = make_regression(n_samples=400, n_features=10, random_state=0) mask = np.random.randint(0, 50, size=(X.shape)) != 0 X[mask] = 0 y = (y + mask.sum(axis=1, keepdims=0)).astype(np.float32) diff --git a/tests/xgboost/test_xgboost_issues.py b/tests/xgboost/test_xgboost_issues.py new file mode 100644 index 00000000..47a2e08b --- /dev/null +++ b/tests/xgboost/test_xgboost_issues.py @@ -0,0 +1,57 @@ +# SPDX-License-Identifier: Apache-2.0 + +import unittest + + +class TestXGBoostIssues(unittest.TestCase): + def test_issue_676(self): + import json + import onnxruntime + import xgboost + import numpy as np + from skl2onnx import convert_sklearn + from skl2onnx.common.data_types import FloatTensorType + from skl2onnx import update_registered_converter + from onnxmltools.convert.xgboost.operator_converters.XGBoost import ( + convert_xgboost, + ) + + def xgbregressor_shape_calculator(operator): + config = json.loads(operator.raw_operator.get_booster().save_config()) + n_targets = int(config["learner"]["learner_model_param"]["num_target"]) + operator.outputs[0].type.shape = [None, n_targets] + + update_registered_converter( + xgboost.XGBRegressor, + "XGBoostXGBRegressor", + xgbregressor_shape_calculator, + convert_xgboost, + ) + # Your data and labels + X = np.random.rand(100, 10) + y = np.random.rand(100, 2) + + # Train XGBoost regressor + model = xgboost.XGBRegressor( + objective="reg:squarederror", n_estimators=2, maxdepth=2 + ) + model.fit(X, y) + + # Define input type (adjust shape according to your input) + initial_type = [("float_input", FloatTensorType([None, X.shape[1]]))] + + # Convert XGBoost model to ONNX + onnx_model = convert_sklearn( + model, initial_types=initial_type, target_opset={"": 12, "ai.onnx.ml": 3} + ) + self.assertIn("dim_value: 2", str(onnx_model.graph.output)) + + sess = onnxruntime.InferenceSession( + onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] + ) + got = sess.run(None, {"float_input": X.astype(np.float32)}) + self.assertEqual(got[0].shape, (100, 2)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/xgboost/test_xgboost_pipeline.py b/tests/xgboost/test_xgboost_pipeline.py index ffc7bfc2..1df296f6 100644 --- a/tests/xgboost/test_xgboost_pipeline.py +++ b/tests/xgboost/test_xgboost_pipeline.py @@ -50,7 +50,7 @@ def transformer_for_column(column): if column.dtype in ["bool"]: return "passthrough" if column.dtype in ["O"]: - return OneHotEncoder(sparse=False) + return OneHotEncoder(sparse_output=False) raise ValueError() return ColumnTransformer(