From b7dac0be8bde6bc196e24914f6c2883996952d53 Mon Sep 17 00:00:00 2001
From: maffettone <pmaffetto@bnl.gov>
Date: Wed, 13 Dec 2023 13:01:07 -0800
Subject: [PATCH] ENH: add 2d kmeans agent

---
 bmm_agents/base.py                            | 10 ++++-
 bmm_agents/sklearn.py                         | 41 ++++++++++++++-----
 .../startup_scripts/historical_mmm4_uids.txt  | 35 ++++++++++++++++
 bmm_agents/startup_scripts/mmm4_Pt_kmeans.py  |  3 +-
 bmm_agents/utils.py                           | 13 ++++++
 5 files changed, 89 insertions(+), 13 deletions(-)
 create mode 100644 bmm_agents/startup_scripts/historical_mmm4_uids.txt

diff --git a/bmm_agents/base.py b/bmm_agents/base.py
index d5fbd3b..d76b98b 100644
--- a/bmm_agents/base.py
+++ b/bmm_agents/base.py
@@ -33,6 +33,7 @@ def __init__(
         exp_bounds: str = "-200 -30 -10 25 12k",
         exp_steps: str = "10 2 0.3 0.05k",
         exp_times: str = "0.5 0.5 0.5 0.5",
+        variable_motor_names: List[str] = ["xafs_x"],
         **kwargs,
     ):
         self._filename = filename
@@ -51,6 +52,7 @@ def __init__(
         self._exp_bounds = exp_bounds
         self._exp_steps = exp_steps
         self._exp_times = exp_times
+        self._variable_motor_names = variable_motor_names
 
         _default_kwargs = self.get_beamline_objects()
         _default_kwargs.update(kwargs)
@@ -199,11 +201,15 @@ def unpack_run(self, run):
             idx_min = np.where(ordinate < self.roi[0])[0][-1] if len(np.where(ordinate < self.roi[0])[0]) else None
             idx_max = np.where(ordinate > self.roi[1])[0][-1] if len(np.where(ordinate > self.roi[1])[0]) else None
             y = y[idx_min:idx_max]
-        return run.baseline.data["xafs_x"][0], y
+        return np.array([run.baseline.data[key][0] for key in self._variable_motor_names]), y
 
     def measurement_plan(self, relative_point: ArrayLike) -> Tuple[str, List, dict]:
         """Works from relative points"""
-        element_positions = self.element_origins + relative_point
+        if len(relative_point) == 2:
+            element_positions = self.element_origins + relative_point
+        else:
+            element_positions = self.element_origins
+            element_positions[0] += relative_point
         args = [
             self.sample_position_motors[0],
             *element_positions[:, 0],
diff --git a/bmm_agents/sklearn.py b/bmm_agents/sklearn.py
index 4cc7e8a..c80b0c7 100644
--- a/bmm_agents/sklearn.py
+++ b/bmm_agents/sklearn.py
@@ -8,9 +8,10 @@
 from numpy.typing import ArrayLike
 from scipy.stats import rv_discrete
 from sklearn.cluster import KMeans
+from sklearn.linear_model import LinearRegression
 
 from .base import BMMBaseAgent
-from .utils import discretize, make_hashable
+from .utils import discretize, make_hashable, make_wafer_grid_list
 
 logger = logging.getLogger(__name__)
 
@@ -121,7 +122,16 @@ def _sample_uncertainty_proxy(self, batch_size=1):
         """
         # Borrowing from Dan's jupyter fun
         # from measurements, perform k-means
-        sorted_independents, sorted_observables = zip(*sorted(zip(self.independent_cache, self.observable_cache)))
+        try:
+            sorted_independents, sorted_observables = zip(
+                *sorted(zip(self.independent_cache, self.observable_cache))
+            )
+        except ValueError:
+            # Multidimensional case
+            sorted_independents, sorted_observables = zip(
+                *sorted(zip(self.independent_cache, self.observable_cache), key=lambda x: (x[0][0], x[0][1]))
+            )
+
         sorted_independents = np.array(sorted_independents)
         sorted_observables = np.array(sorted_observables)
         self.model.fit(sorted_observables)
@@ -131,11 +141,19 @@ def _sample_uncertainty_proxy(self, batch_size=1):
         distances = self.model.transform(sorted_observables)
         # determine golf-score of each point (minimum value)
         min_landscape = distances.min(axis=1)
-        # generate 'uncertainty weights' - as a polynomial fit of the golf-score for each point
-        _x = np.arange(*self.bounds, self.min_step_size)
-        uwx = polyval(_x, polyfit(sorted_independents, min_landscape, deg=5))
-        # Chose from the polynomial fit
-        return pick_from_distribution(_x, uwx, num_picks=batch_size), centers
+        if self.bounds.size == 2:
+            # Assume a 1d scan
+            # generate 'uncertainty weights' - as a polynomial fit of the golf-score for each point
+            _x = np.arange(*self.bounds, self.min_step_size)
+            uwx = polyval(_x, polyfit(sorted_independents, min_landscape, deg=5))
+            # Chose from the polynomial fit
+            return pick_from_distribution(_x, uwx, num_picks=batch_size), centers
+        else:
+            # assume a 2d scan, use a linear model to predict the uncertainty
+            grid = make_wafer_grid_list(*self.bounds.ravel(), step=self.min_step_size)
+            uncertainty_preds = LinearRegression().fit(sorted_independents, min_landscape).predict(grid)
+            top_indicies = np.argsort(uncertainty_preds)[-batch_size:]
+            return grid[top_indicies], centers
 
     def ask(self, batch_size=1):
         suggestions, centers = self._sample_uncertainty_proxy(batch_size)
@@ -144,11 +162,14 @@ def ask(self, batch_size=1):
             suggestions = [suggestions]
         # Keep non redundant suggestions and add to knowledge cache
         for suggestion in suggestions:
-            if suggestion in self.knowledge_cache:
-                logger.info(f"Suggestion {suggestion} is ignored as already in the knowledge cache")
+            hashable_suggestion = make_hashable(discretize(suggestion, self.min_step_size))
+            if hashable_suggestion in self.knowledge_cache:
+                logger.info(
+                    f"Suggestion {suggestion} is ignored as already in the knowledge cache: {hashable_suggestion}"
+                )
                 continue
             else:
-                self.knowledge_cache.add(make_hashable(discretize(suggestion, self.min_step_size)))
+                self.knowledge_cache.add(hashable_suggestion)
                 kept_suggestions.append(suggestion)
 
         base_doc = dict(
diff --git a/bmm_agents/startup_scripts/historical_mmm4_uids.txt b/bmm_agents/startup_scripts/historical_mmm4_uids.txt
new file mode 100644
index 0000000..467c717
--- /dev/null
+++ b/bmm_agents/startup_scripts/historical_mmm4_uids.txt
@@ -0,0 +1,35 @@
+9c1f9ce6-6c35-42c9-ae0b-40620336596c
+b02b4bfc-ef9f-4a8d-bfcd-852923061371
+df21d22a-dc85-4566-932f-d66b20f714bd
+7a5257a9-c5f6-4dab-908e-82ec74308817
+818400f0-b9a7-489d-ba3b-9839d8e35700
+3c8a6931-b465-46e2-860a-bca6a203ac04
+4bbd010d-5310-4fad-ad4e-956517c04aff
+7d7e5497-9fb4-4829-b526-425d42974012
+8e4e5b73-2fad-41ca-b3ca-e1c1d6183052
+797a514b-f673-4131-a236-f5250331f3dd
+434b6f94-37ae-41d3-8d3e-8b4ab18d9711
+ea441617-9794-46f0-8e6e-f704ebba8163
+e8374ec8-2a80-48c4-a77f-bd3271677590
+4a992e79-3f45-4c1c-8a99-a47f7b8d8af5
+cb0629dc-a6ea-4581-abbc-bbde76aecb10
+7fd0e59f-9b06-48f1-a17d-a9053032fe34
+ef501a87-5e09-41aa-a72b-20004e00d510
+dafdf68f-a064-4dd3-acf0-dd6506c0aca7
+1ba7768a-bddb-48ac-9148-1162659c38d0
+60d42219-ab88-4865-ae44-6684e538c322
+6d1be8c4-2534-4e8b-a12e-82875eae3996
+adb51916-d093-44d0-b86c-6397901d4eec
+340e4116-2a30-4a4c-a1a4-04ca7c7657e0
+91ce30b3-03cf-4557-b7a5-c97293dce1be
+ec5023d6-a45d-4109-8d42-7cc0c74d72ed
+2bd7ca7f-4ac4-4ed8-8eac-a5e8aa0aea89
+bd09a4ee-3e36-4f07-b1f8-e9baace2617a
+bd1a9f03-8117-4393-a49f-9ea2c28b53c1
+95364f08-e085-41ad-9b23-6820a850c67f
+5c2d9d83-89e2-481e-818d-73864b792ed6
+58b676df-1f08-4554-8736-ac6ef1fe0422
+0288139d-b373-4525-ad51-f919b4eb5d1a
+adb8d6a8-6b7d-4d38-8c74-26fdc3268519
+803bc7ef-60ba-4c43-962c-6db72b400f6d
+4a9fc081-fd94-45c8-a2e4-e7cfd615d155
diff --git a/bmm_agents/startup_scripts/mmm4_Pt_kmeans.py b/bmm_agents/startup_scripts/mmm4_Pt_kmeans.py
index 8fa950c..ec5461d 100644
--- a/bmm_agents/startup_scripts/mmm4_Pt_kmeans.py
+++ b/bmm_agents/startup_scripts/mmm4_Pt_kmeans.py
@@ -1,3 +1,4 @@
+import numpy as np
 from bluesky_adaptive.server import register_variable, shutdown_decorator, startup_decorator
 
 from bmm_agents.sklearn import MultiElementActiveKmeansAgent
@@ -16,7 +17,7 @@
     exp_bounds="-200 -30 -10 25 13k",
     exp_steps="10 2 0.5 0.05k",
     exp_times="1 1 1 1",
-    bounds=(-32, 32),
+    bounds=np.array([(-32, 32), (-32, 32)]),
     ask_on_tell=False,
     report_on_tell=True,
     k_clusters=6,
diff --git a/bmm_agents/utils.py b/bmm_agents/utils.py
index ecbaa29..a021048 100644
--- a/bmm_agents/utils.py
+++ b/bmm_agents/utils.py
@@ -19,6 +19,19 @@ def make_hashable(x):
         return float(x)
 
 
+def make_wafer_grid_list(x_min, x_max, y_min, y_max, step):
+    """
+    Make the list of all of the possible 2d points that lie within a circle of the origin
+    """
+    x = np.arange(x_min, x_max, step)
+    y = np.arange(y_min, y_max, step)
+    xx, yy = np.meshgrid(x, y)
+    center = np.array([x_min + (x_max - x_min) / 2, y_min + (y_max - y_min) / 2])
+    distance = np.sqrt((xx - center[0]) ** 2 + (yy - center[1]) ** 2)
+    radius = min((x_max - x_min) / 2, (y_max - y_min) / 2)
+    return np.array([xx[distance < radius], yy[distance < radius]]).T
+
+
 class Pandrosus:
     """A thin wrapper around basic XAS data processing for individual
     data sets as implemented in Larch.