From dccdefcd7ba1a7fa7861588a37cd8260448fdd59 Mon Sep 17 00:00:00 2001
From: baniasbaabe <banias@hotmail.de>
Date: Sun, 4 Feb 2024 12:37:11 +0100
Subject: [PATCH] Add puncc

---
 book/machinelearning/outlierdetection.ipynb | 59 +++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/book/machinelearning/outlierdetection.ipynb b/book/machinelearning/outlierdetection.ipynb
index 1022f7e..3092056 100644
--- a/book/machinelearning/outlierdetection.ipynb
+++ b/book/machinelearning/outlierdetection.ipynb
@@ -83,6 +83,65 @@
     "    \n",
     "majority_vote(labels)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Robust Outlier Detection with `puncc`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Outlier Detection is notoriously hard.\n",
+    "\n",
+    "But it doesn't have to.\n",
+    "\n",
+    "`puncc` offers outlier detection, powered by Conformal Prediction, where the detection threshold will be calibrated.\n",
+    "\n",
+    "So, false alarms are reduced."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install puncc"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.ensemble import IsolationForest\n",
+    "from deel.puncc.anomaly_detection import SplitCAD\n",
+    "from deel.puncc.api.prediction import BasePredictor\n",
+    "\n",
+    "# We need to redefine the predict to output the nonconformity scores.\n",
+    "class ADPredictor(BasePredictor):\n",
+    "    def predict(self, X):\n",
+    "        return -self.model.score_samples(X)\n",
+    "\n",
+    "# Wrap Isolation Forest in a predictor\n",
+    "if_predictor = ADPredictor(IsolationForest())\n",
+    "\n",
+    "# Instantiate CAD on top of IF predictor\n",
+    "if_cad = SplitCAD(if_predictor, train=True)\n",
+    "\n",
+    "\n",
+    "if_cad.fit(z=dataset, fit_ratio=0.7)\n",
+    "\n",
+    "# Maximum false detection rate\n",
+    "alpha = 0.01\n",
+    "\n",
+    "results = if_cad.predict(new_data, alpha=alpha)"
+   ]
   }
  ],
  "metadata": {