From df53ecf8074200aaf602658ba87b0e8e21bfdb0e Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 7 Nov 2019 17:00:02 +0100
Subject: [PATCH] :bug: Add target_column to predict_proba_arff and include the
parameter in unit test.
---
docs/source/releases.rst | 5 +++++
gama/GamaClassifier.py | 22 +++++++++++++++-------
tests/system/test_gamaclassifier.py | 7 ++++---
3 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/docs/source/releases.rst b/docs/source/releases.rst
index c26b7c44..66003aec 100644
--- a/docs/source/releases.rst
+++ b/docs/source/releases.rst
@@ -1,6 +1,11 @@
Release Notes
=============
+Version 19.11.2
+---------------
+Bugfixes:
+ - `predict_proba_arff` now also accepts a `target_column` as expected from the previous update.
+
Version 19.11.1
---------------
Features:
diff --git a/gama/GamaClassifier.py b/gama/GamaClassifier.py
index c1332346..f2169834 100644
--- a/gama/GamaClassifier.py
+++ b/gama/GamaClassifier.py
@@ -1,5 +1,5 @@
import inspect
-from typing import Union
+from typing import Union, Optional
import numpy as np
import pandas as pd
@@ -88,17 +88,25 @@ def predict_proba(self, x: Union[pd.DataFrame, np.ndarray]):
x[col] = x[col].astype(self._X[col].dtype)
return self._predict_proba(x)
- def predict_proba_arff(self, arff_file_path: str):
+ def predict_proba_arff(self, arff_file_path: str, target_column: Optional[str] = None):
""" Predict the class probabilities for input in the arff_file, must have empty target column.
- Predict target for X, using the best found pipeline(s) during the `fit` call.
-
- :param arff_file_path: str
+ Parameters
+ ----------
+ arff_file_path: str
+ An ARFF file with the same columns as the one that used in fit.
+ Target column must be present in file, but its values are ignored (can be '?').
+ target_column: str, optional (default=None)
+ Specifies which column the model should predict.
+ If left None, the last column is taken to be the target.
- :return: a numpy array with class probabilities. The array is of shape (N, K) where N is the length of the
+ Returns
+ -------
+ numpy.ndarray
+ Numpy array with class probabilities. The array is of shape (N, K) where N is the length of the
first dimension of X, and K is the number of class labels found in `y` of `fit`.
"""
- X, _ = X_y_from_arff(arff_file_path)
+ X, _ = X_y_from_arff(arff_file_path, target_column)
return self._predict_proba(X)
def fit(self, x, y, *args, **kwargs):
diff --git a/tests/system/test_gamaclassifier.py b/tests/system/test_gamaclassifier.py
index de80dac9..c61c9629 100644
--- a/tests/system/test_gamaclassifier.py
+++ b/tests/system/test_gamaclassifier.py
@@ -35,6 +35,7 @@
breast_cancer_missing = dict(
name='breast_cancer_missing',
load=load_breast_cancer,
+ target='status',
test_size=143,
n_classes=2,
base_accuracy=0.62937,
@@ -98,9 +99,9 @@ def _test_dataset_problem(
y_test = [str(val) for val in y_test]
with Stopwatch() as sw:
- gama.fit_arff(train_path)
- class_predictions = gama.predict_arff(test_path)
- class_probabilities = gama.predict_proba_arff(test_path)
+ gama.fit_arff(train_path, target_column=data['target'])
+ class_predictions = gama.predict_arff(test_path, target_column=data['target'])
+ class_probabilities = gama.predict_proba_arff(test_path, target_column=data['target'])
gama_score = gama.score_arff(test_path)
else:
X, y = data['load'](return_X_y=True)