DistrictDataLabs · lwgray · May 22, 2022 · May 21, 2022 · May 21, 2022 · May 21, 2022
diff --git a/examples/bbengfort/corpus.ipynb b/examples/bbengfort/corpus.ipynb
diff --git a/examples/bbengfort/testing.ipynb b/examples/bbengfort/testing.ipynb
diff --git a/tests/test_cluster/test_elbow.py b/tests/test_cluster/test_elbow.py
@@ -205,12 +205,16 @@ def test_invalid_k(self):
         """
         Assert that invalid values of K raise exceptions
         """
+        # Generate a blobs data set
+        X, y = make_blobs(
+            n_samples=1000, n_features=12, centers=6, shuffle=True, random_state=42
+        )
 
         with pytest.raises(YellowbrickValueError):
-            KElbowVisualizer(KMeans(), k=(1, 2, 3, "foo", 5))
+            KElbowVisualizer(KMeans(), k=(1, 2, 3, "foo", 5)).fit(X)
 
         with pytest.raises(YellowbrickValueError):
-            KElbowVisualizer(KMeans(), k="foo")
+            KElbowVisualizer(KMeans(), k="foo").fit(X)
 
     def test_valid_k(self):
         """
@@ -220,16 +224,21 @@ def test_valid_k(self):
         # if k is a tuple of 2 ints, k_values = range(k[0], k[1])
         # if k is an iterable, k_values_ = list(k)
 
-        visualizer = KElbowVisualizer(KMeans(), k=8)
+        # Generate a blobs data set
+        X, y = make_blobs(
+            n_samples=1000, n_features=12, centers=6, shuffle=True, random_state=42
+        )
+
+        visualizer = KElbowVisualizer(KMeans(), k=8).fit(X)
         assert visualizer.k_values_ == list(np.arange(2, 8 + 1))
 
-        visualizer = KElbowVisualizer(KMeans(), k=(4, 12))
+        visualizer = KElbowVisualizer(KMeans(), k=(4, 12)).fit(X)
         assert visualizer.k_values_ == list(np.arange(4, 12))
 
-        visualizer = KElbowVisualizer(KMeans(), k=np.arange(10, 100, 10))
+        visualizer = KElbowVisualizer(KMeans(), k=np.arange(10, 100, 10)).fit(X)
         assert visualizer.k_values_ == list(np.arange(10, 100, 10))
 
-        visualizer = KElbowVisualizer(KMeans(), k=[10, 20, 30, 40, 50, 60, 70, 80, 90])
+        visualizer = KElbowVisualizer(KMeans(), k=[10, 20, 30, 40, 50, 60, 70, 80, 90]).fit(X)
         assert visualizer.k_values_ == list(np.arange(10, 100, 10))
 
     @pytest.mark.xfail(sys.platform == "win32", reason="images not close on windows")
@@ -491,4 +500,14 @@ def test_set_colors_manually(self):
         # Execute drawing
         oz.draw()
         oz.finalize()
-        self.assert_images_similar(oz, tol=3.2)
+        self.assert_images_similar(oz, tol=3.2)
+
+    def test_get_params(self):
+        """
+        Ensure the get params works for sklearn-compatibility
+        """
+        oz = KElbowVisualizer(
+            KMeans(random_state=0), k=5,
+        )
+        params = oz.get_params()
+        assert len(params) > 0
diff --git a/tests/test_model_selection/test_dropping_curve.py b/tests/test_model_selection/test_dropping_curve.py
@@ -188,4 +188,12 @@ def test_bad_train_sizes(self):
         Test learning curve with bad input for feature size.
         """
         with pytest.raises(YellowbrickValueError):
-            DroppingCurve(SVC(), param_name="gamma", feature_sizes=100)
+            DroppingCurve(SVC(), param_name="gamma", feature_sizes=100)
+
+    def test_get_params(self):
+        """
+        Ensure dropping curve get params works correctly
+        """
+        oz = DroppingCurve(MultinomialNB())
+        params = oz.get_params()
+        assert len(params) > 0
diff --git a/tests/test_utils/test_wrapper.py b/tests/test_utils/test_wrapper.py
@@ -17,10 +17,13 @@
 ## Imports
 ##########################################################################
 
+import pytest
+
 from unittest import mock
 
 from yellowbrick.base import Visualizer
 from yellowbrick.utils.wrapper import *
+from yellowbrick.exceptions import YellowbrickAttributeError, YellowbrickTypeError
 from sklearn.naive_bayes import MultinomialNB
 from sklearn.naive_bayes import GaussianNB
 
@@ -133,3 +136,21 @@ def test_rewrap_object(self):
         obj.predict()
         old.predict.assert_called_once()
         new.predict.assert_called_once()
+
+    def test_wrapper_recursion(self):
+        """
+        Ensure wrapper recursion isn't possible
+        """
+        obj = Wrapper("")
+        obj._wrapped = obj
+        with pytest.raises(YellowbrickTypeError):
+            obj.foo
+
+    def test_attribute_error(self):
+        """
+        Attribute errors should return a YellowbrickAttributeError
+        """
+        obj = WrappedEstimator()
+        pat = r"neither visualizer 'WrappedEstimator' nor wrapped estimator 'MagicMock' have attribute 'notaproperty'"
+        with pytest.raises(YellowbrickAttributeError, match=pat):
+            obj.notaproperty
diff --git a/yellowbrick/cluster/elbow.py b/yellowbrick/cluster/elbow.py
@@ -186,7 +186,7 @@ class KElbowVisualizer(ClusteringScoreVisualizer):
         - **calinski_harabasz**: ratio of within to between cluster dispersion
 
     distance_metric : str or callable, default='euclidean'
-        The metric to use when calculating distance between instances in a 
+        The metric to use when calculating distance between instances in a
         feature array. If metric is a string, it must be one of the options allowed
         by sklearn's metrics.pairwise.pairwise_distances. If X is the distance array itself,
         use metric="precomputed".
@@ -280,6 +280,7 @@ def __init__(
             )
 
         # Store the arguments
+        self.k = k
         self.scoring_metric = KELBOW_SCOREMAP[metric]
         self.metric = metric
         self.timings = timings
@@ -293,50 +294,41 @@ def __init__(
             CVLINE: LINE_COLOR,
         }
 
-        # Convert K into a tuple argument if an integer
-        if isinstance(k, int):
-            self.k_values_ = list(range(2, k + 1))
+    def fit(self, X, y=None, **kwargs):
+        """
+        Fits n KMeans models where n is the length of ``self.k_values_``,
+        storing the silhouette scores in the ``self.k_scores_`` attribute.
+        The "elbow" and silhouette score corresponding to it are stored in
+        ``self.elbow_value`` and ``self.elbow_score`` respectively.
+        This method finishes up by calling draw to create the plot.
+        """
+         # Convert K into a tuple argument if an integer
+        if isinstance(self.k, int):
+            self.k_values_ = list(range(2, self.k + 1))
         elif (
-            isinstance(k, tuple)
-            and len(k) == 2
-            and all(isinstance(x, (int, np.integer)) for x in k)
+            isinstance(self.k, tuple)
+            and len(self.k) == 2
+            and all(isinstance(x, (int, np.integer)) for x in self.k)
         ):
-            self.k_values_ = list(range(*k))
-        elif isinstance(k, Iterable) and all(
-            isinstance(x, (int, np.integer)) for x in k
+            self.k_values_ = list(range(*self.k))
+        elif isinstance(self.k, Iterable) and all(
+            isinstance(x, (int, np.integer)) for x in self.k
         ):
-            self.k_values_ = list(k)
+            self.k_values_ = list(self.k)
         else:
             raise YellowbrickValueError(
                 (
                     "Specify an iterable of integers, a range, or maximal K value,"
-                    " the value '{}' is not a valid argument for K.".format(k)
+                    " the value '{}' is not a valid argument for K.".format(self.k)
                 )
             )
 
-        # Holds the values of the silhoutte scores
-        self.k_scores_ = None
-
-        # Set Default Elbow Value
-        self.elbow_value_ = None
-
-    def fit(self, X, y=None, **kwargs):
-        """
-        Fits n KMeans models where n is the length of ``self.k_values_``,
-        storing the silhouette scores in the ``self.k_scores_`` attribute.
-        The "elbow" and silhouette score corresponding to it are stored in
-        ``self.elbow_value`` and ``self.elbow_score`` respectively.
-        This method finishes up by calling draw to create the plot.
-        """
-
         self.k_scores_ = []
         self.k_timers_ = []
         self.kneedle = None
         self.knee_value = None
-
-        if self.locate_elbow:
-            self.elbow_value_ = None
-            self.elbow_score_ = None
+        self.elbow_value_ = None
+        self.elbow_score_ = None
 
         for k in self.k_values_:
             # Compute the start time for each  model
@@ -527,7 +519,7 @@ def kelbow_visualizer(
         - **calinski_harabasz**: ratio of within to between cluster dispersion
 
     distance_metric : str or callable, default='euclidean'
-        The metric to use when calculating distance between instances in a 
+        The metric to use when calculating distance between instances in a
         feature array. If metric is a string, it must be one of the options allowed
         by sklearn's metrics.pairwise.pairwise_distances. If X is the distance array itself,
         use metric="precomputed".

diff --git a/yellowbrick/model_selection/dropping_curve.py b/yellowbrick/model_selection/dropping_curve.py
@@ -243,7 +243,7 @@ def fit(self, X, y=None):
         # compute the mean and standard deviation of the training data
         self.train_scores_mean_ = np.mean(self.train_scores_, axis=1)
         self.train_scores_std_ = np.std(self.train_scores_, axis=1)
-        
+
         # compute the mean and standard deviation of the validation data
         self.valid_scores_mean_ = np.mean(self.valid_scores_, axis=1)
         self.valid_scores_std_ = np.std(self.valid_scores_, axis=1)

diff --git a/yellowbrick/utils/wrapper.py b/yellowbrick/utils/wrapper.py
@@ -17,6 +17,8 @@
 ## Wrapper Class
 ##########################################################################
 
+from yellowbrick.exceptions import YellowbrickAttributeError, YellowbrickTypeError
+
 
 class Wrapper(object):
     """
@@ -38,5 +40,11 @@ def __init__(self, obj):
         self._wrapped = obj
 
     def __getattr__(self, attr):
+        if self is self._wrapped:
+            raise YellowbrickTypeError("wrapper cannot wrap itself or recursion will occur")
+
         # proxy to the wrapped object
-        return getattr(self._wrapped, attr)
+        try:
+            return getattr(self._wrapped, attr)
+        except AttributeError as e:
+            raise YellowbrickAttributeError(f"neither visualizer '{self.__class__.__name__}' nor wrapped estimator '{type(self._wrapped).__name__}' have attribute '{attr}'") from e