ing-bank · ReinierKoops · Jul 6, 2023 · May 24, 2023 · May 24, 2023 · May 24, 2023
diff --git a/skorecard/metrics/metrics.py b/skorecard/metrics/metrics.py
@@ -18,14 +18,18 @@ def woe_1d(X, y, epsilon=0.00001):
         - counts_0: count of entries per bin where y==0
         - counts_1: count of entries per bin where y==1
     """
-    X = X.copy().reset_index(drop=True)
+    # Make sure y has the right number of rows.
+    if y.shape[0] != X.shape[0]:
+        raise ValueError(f"y has {y.shape[0]}, but expected {X.shape[0]}")
+
+    # Make sure y is a pd.Series so we can reset its index.
     if not isinstance(y, pd.Series):
-        if y.shape[0] == X.shape[0]:
-            y = pd.Series(y).reset_index(drop=True)
-        else:
-            raise ValueError(f"y has {y.shape[0]}, but expected {X.shape[0]}")
+        y = pd.Series(y)
+
+    X = X.reset_index(drop=True)
+    y = y.reset_index(drop=True)
 
-    # Ensure classes in y start at zero
+    # Ensure classes in y start at zero.
     y = y - min(y)
 
     df = pd.concat([X, y], axis=1, ignore_index=True)

diff --git a/skorecard/reporting/report.py b/skorecard/reporting/report.py
@@ -127,7 +127,7 @@ def build_bucket_table(
     stats["WoE"] = (event_percentage / non_event_percentage).apply(lambda x: np.log(x))
     stats.loc[stats["Count"] == 0, "WoE"] = np.nan
 
-    stats["IV"] = (stats["% Non-event"] - stats["% Event"]) * stats["WoE"]
+    stats["IV"] = abs((stats["% Non-event"] - stats["% Event"]) * stats["WoE"])
 
     stats["% Event"] = np.round(100 * stats["% Event"], 2)
     stats["% Non-event"] = np.round(100 * stats["% Non-event"], 2)

diff --git a/skorecard/rescale/rescale.py b/skorecard/rescale/rescale.py
@@ -144,12 +144,13 @@ def _calculate_scorecard_points(self):
         scorecard = pd.concat(
             [
                 scorecard,
-                pd.DataFrame(
+                pd.DataFrame.from_records(
                     [{"feature": "Intercept", "coef": self.model.intercept_[0], "bin_index": 0, "map": 0, "woe": 0}]
                 ),
             ],
             ignore_index=True,
         )
+
         #     return buckets, woes
         scorecard["contribution"] = scorecard["woe"] * scorecard["coef"]
 

diff --git a/tests/test_bucket_table_woe_values.py b/tests/test_bucket_table_woe_values.py
@@ -20,3 +20,5 @@ def test_bucket_table_woe_values():
         b_tab_woes = {x for x in b_tab_woes if pd.notna(x)}
         data_woes = set(np.round(X_woe[c].value_counts().index, 3))
         assert b_tab_woes == data_woes
+        iv_lt_zero = [x < 0 for x in bucket_table["IV"]]
+        assert True not in iv_lt_zero
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -68,8 +68,14 @@ def test_psi_values(X1_X2):
 def test_IV_values(X_y):
     """Assert IV values match expectations."""
     X, y = X_y
-    X = pd.DataFrame(X, columns=["col1", "col2"])
+    random_index = [2 * x for x in range(0, len(y))]
+    X = pd.DataFrame(X, columns=["col1", "col2"], index=random_index)
     expected_iv = {"col1": 5.307, "col2": 4.635}
     iv_vals = skorecard.reporting.report.iv(X, y)
+    np.testing.assert_array_almost_equal(pd.Series(expected_iv).values, pd.Series(iv_vals).values, decimal=2)
 
+    # Make sure these are still accurate if y is a pd.Series with the same
+    # non-continuous indices as X.
+    y = pd.Series(y, index=random_index)
+    iv_vals = skorecard.reporting.report.iv(X, y)
     np.testing.assert_array_almost_equal(pd.Series(expected_iv).values, pd.Series(iv_vals).values, decimal=2)