Skip to content

Commit

Permalink
🐛 Add target_column to predict_proba_arff and include the parameter i…
Browse files Browse the repository at this point in the history
…n unit test.
  • Loading branch information
PGijsbers committed Nov 7, 2019
1 parent 125348b commit df53ecf
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 10 deletions.
5 changes: 5 additions & 0 deletions docs/source/releases.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
Release Notes
=============

Version 19.11.2
---------------
Bugfixes:
- `predict_proba_arff` now also accepts a `target_column` as expected from the previous update.

Version 19.11.1
---------------
Features:
Expand Down
22 changes: 15 additions & 7 deletions gama/GamaClassifier.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import inspect
from typing import Union
from typing import Union, Optional

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -88,17 +88,25 @@ def predict_proba(self, x: Union[pd.DataFrame, np.ndarray]):
x[col] = x[col].astype(self._X[col].dtype)
return self._predict_proba(x)

def predict_proba_arff(self, arff_file_path: str):
def predict_proba_arff(self, arff_file_path: str, target_column: Optional[str] = None):
""" Predict the class probabilities for input in the arff_file, must have empty target column.
Predict target for X, using the best found pipeline(s) during the `fit` call.
:param arff_file_path: str
Parameters
----------
arff_file_path: str
An ARFF file with the same columns as the one that used in fit.
Target column must be present in file, but its values are ignored (can be '?').
target_column: str, optional (default=None)
Specifies which column the model should predict.
If left None, the last column is taken to be the target.
:return: a numpy array with class probabilities. The array is of shape (N, K) where N is the length of the
Returns
-------
numpy.ndarray
Numpy array with class probabilities. The array is of shape (N, K) where N is the length of the
first dimension of X, and K is the number of class labels found in `y` of `fit`.
"""
X, _ = X_y_from_arff(arff_file_path)
X, _ = X_y_from_arff(arff_file_path, target_column)
return self._predict_proba(X)

def fit(self, x, y, *args, **kwargs):
Expand Down
7 changes: 4 additions & 3 deletions tests/system/test_gamaclassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
breast_cancer_missing = dict(
name='breast_cancer_missing',
load=load_breast_cancer,
target='status',
test_size=143,
n_classes=2,
base_accuracy=0.62937,
Expand Down Expand Up @@ -98,9 +99,9 @@ def _test_dataset_problem(
y_test = [str(val) for val in y_test]

with Stopwatch() as sw:
gama.fit_arff(train_path)
class_predictions = gama.predict_arff(test_path)
class_probabilities = gama.predict_proba_arff(test_path)
gama.fit_arff(train_path, target_column=data['target'])
class_predictions = gama.predict_arff(test_path, target_column=data['target'])
class_probabilities = gama.predict_proba_arff(test_path, target_column=data['target'])
gama_score = gama.score_arff(test_path)
else:
X, y = data['load'](return_X_y=True)
Expand Down

0 comments on commit df53ecf

Please sign in to comment.