From 438f8a8eb531ada6d4c1aac7d61ddb96eec3f274 Mon Sep 17 00:00:00 2001 From: randersenyb Date: Mon, 22 Jul 2024 18:28:24 -0700 Subject: [PATCH 1/2] Added support so that the ANN distance type is displayed. Added work-around with opentele issue... --- .vscode/launch.json | 4 ++-- aerospike/aerospikehdf.py | 15 ++++++++----- aerospike/baseaerospike.py | 44 ++++++++++++++++++++++++-------------- aerospike/distance.py | 13 +++++++---- 4 files changed, 49 insertions(+), 27 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index e5347da64..3ae4b551e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -215,8 +215,8 @@ "args": [ "--dataset", "gist-960-euclidean", "--logfile", "./hdfquery-gist1.log", - "-r", "10", - "--limit", "100", + "-r", "1", + "--limit", "10", "--idxname", "ANN-data_euclidean_SQUARED_EUCLIDEAN_960_16_100_100_Idx" ], "console": "integratedTerminal" diff --git a/aerospike/aerospikehdf.py b/aerospike/aerospikehdf.py index 8d00849a7..f034f0159 100644 --- a/aerospike/aerospikehdf.py +++ b/aerospike/aerospikehdf.py @@ -220,7 +220,7 @@ def parse_arguments_query(parser: argparse.ArgumentParser) -> None: default="k-nn", type=str, choices=METRICS.keys(), - ) + ) BaseAerospike.parse_arguments(parser) @@ -235,7 +235,6 @@ def __init__(self, runtimeArgs: argparse.Namespace, actions: OperationActions): self._queryarray : Union[np.ndarray, List[np.ndarray]] = None self._neighbors : Union[np.ndarray, List[np.ndarray]] = None self._distances : Union[np.ndarray, List[np.ndarray]] = None - self._ann_distance : str = None self._dataset = None self._pausePuts : bool = False self._pks : Union[np.ndarray, List] = None @@ -327,9 +326,14 @@ async def get_dataset(self) -> None: self._dimensions, self._pks) = load_and_transform_dataset(self._datasetname, self._hdf_file) - self._ann_distance = distance.lower() + distance = distance.lower() + if self._ann_distance is None: + self._ann_distance = distance + elif distance != self._ann_distance: + self.print_log(f"ANN distance types do not match! Found: {distance} Provided: {self._ann_distance}. Distance calculations could be wrong!", logging.WARN) + if self._idx_distance is None or not self._idx_distance: - self._idx_distance = DistanceMaps.get(self._ann_distance) + self._idx_distance = DistanceMaps.get(distance) if self._idx_distance is None or not self._idx_distance: raise ValueError(f"Distance Map '{distance}' was not found.") @@ -783,7 +787,7 @@ async def query(self) -> None: self._query_metric_big_value = bigknn((self._neighbors,self._distances), self._query_neighbors, len(self._query_neighbors[0]), self._query_metric_bigann_result).attrs["mean"] metricValuesBig.append(self._query_metric_big_value) - self._logger.info(f"Run: {i}, Neighbors: {len(self._query_neighbors)}, {self._query_metric['type']}: {self._query_metric_value}, aerospike recall: {self._aerospike_metric_value}, Big: {self._query_metric_big_value}") + self._logger.info(f"Run: {i}, Neighbors: {len(self._query_neighbors)}, {'No distance' if distancemetric is None else distancemetric.type} {self._query_metric['type']}: {self._query_metric_value}, aerospike recall: {self._aerospike_metric_value}, Big: {self._query_metric_big_value}") i += 1 @@ -922,6 +926,7 @@ async def query_run(self, client:vectorASyncClient, runNbr:int, distancemetric : if distancemetric is not None: try: distances = [float(distancemetric.distance(searchValues, self._get_orginal_vector_from_pk(idx))) for idx in result_ids] + if self._query_check and not await self._check_query_distances(distances, aerospike_distances, len(rundistance), runNbr): if len(msg) == 0: msg = "Warn: Distances don't match" diff --git a/aerospike/baseaerospike.py b/aerospike/baseaerospike.py index 7b3ce35b8..25a51a556 100644 --- a/aerospike/baseaerospike.py +++ b/aerospike/baseaerospike.py @@ -6,7 +6,7 @@ import argparse from enum import Flag, auto -from typing import List, Dict, Union +from typing import List, Dict, Union, OrderedDict from importlib.metadata import version from logging import _nameToLevel as LogLevels from threading import Thread @@ -18,6 +18,7 @@ from opentelemetry.sdk.metrics import MeterProvider, Meter from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader from opentelemetry.sdk.resources import SERVICE_NAME, Resource +from opentelemetry.util.types import Attributes from aerospike_vector_search import types as vectorTypes from metrics import all_metrics as METRICS @@ -154,6 +155,7 @@ def __init__(self, runtimeArgs: argparse.Namespace, logger: logging.Logger): self._idx_binName : str = None self._idx_distance = None + self._ann_distance : str = None self._idx_hnswparams : vectorTypes.HnswParams = None self._query_hnswparams : vectorTypes.HnswSearchParams = None @@ -227,6 +229,8 @@ def _prometheus_init(self, runtimeArgs: argparse.Namespace) -> None: self._prometheus_heartbeat_gauge = self._meter.create_gauge("aerospike.hdf.heartbeat") self._prometheus_hb : int = runtimeArgs.prometheushb + + self._heartbeat_current_stage : int = -1 def _logging_init(self, runtimeArgs: argparse.Namespace, logger: logging.Logger) -> None: @@ -283,30 +287,37 @@ def prometheus_status(self, done:bool = False) -> None: self.__cnthb__ += 1 if self._heartbeat_stage == 0: + if self._heartbeat_current_stage == self._heartbeat_stage: + return + self._heartbeat_current_stage = self._heartbeat_stage self._prometheus_heartbeat_gauge.set(self.__cnthb__, {"paused": "Starting" }) return if self._heartbeat_stage == 1: - attrs = {"dims": self._dimensions, - "poprecs": None if self._trainarray is None else len(self._trainarray), - "queries": None if self._queryarray is None else len(self._queryarray), - "querynbrlmt": self._query_nbrlimit, - "queryruns": self._query_runs, - "dataset":self._datasetname, - "paused":"Cellecting", - "action": None if self._actions is None else self._actions.name, - "hnswparams": self.hnswstr() - } + if self._heartbeat_current_stage == self._heartbeat_stage: + return + self._heartbeat_current_stage = self._heartbeat_stage + attrs : Dict = {"dims": self._dimensions, + "poprecs": None if self._trainarray is None else len(self._trainarray), + "queries": None if self._queryarray is None else len(self._queryarray), + "querynbrlmt": self._query_nbrlimit, + "queryruns": self._query_runs, + "dataset":self._datasetname, + "paused":"Cellecting", + "action": None if self._actions is None else self._actions.name, + "hnswparams": self.hnswstr() + } if self._namespace is not None: - attrs["ns"] = self._namespace - attrs["set"] = self._setName + attrs.update({"ns": self._namespace, + "set": self._setName}) if self._idx_namespace is not None: - attrs["idxns"] = self._idx_namespace - attrs["idx"] = self._idx_name - + attrs.update({"idxns": self._idx_namespace, + "idx": self._idx_name}) + self._prometheus_heartbeat_gauge.set(self.__cnthb__, attrs) + return pausestate : str = None @@ -336,6 +347,7 @@ def prometheus_status(self, done:bool = False) -> None: "idx":self._idx_name, "idxbin":self._idx_binName, "idxdist": None if self._idx_distance is None else self._idx_distance.name, + "anndist": self._ann_distance, "dims": self._dimensions, "poprecs": None if self._trainarray is None else len(self._trainarray), "queries": None if self._queryarray is None else len(self._queryarray), diff --git a/aerospike/distance.py b/aerospike/distance.py index 8bc4b6b4a..c587751de 100644 --- a/aerospike/distance.py +++ b/aerospike/distance.py @@ -24,23 +24,28 @@ def euclidean(a, b): class Metric(NamedTuple): distance: Callable[[np.ndarray, np.ndarray], float] distance_valid: Callable[[float], bool] + type: str metrics = { "hamming": Metric( distance=lambda a, b: np.sum(a.astype(np.bool_) ^ b.astype(np.bool_)), - distance_valid=lambda a: True + distance_valid=lambda a: True, + type='hamming' ), "jaccard": Metric( distance=lambda a, b: 1 - jaccard(a, b), - distance_valid=lambda a: a < 1 - 1e-5 + distance_valid=lambda a: a < 1 - 1e-5, + type="jaccard" ), "euclidean": Metric( distance=lambda a, b: euclidean(a, b), - distance_valid=lambda a: True + distance_valid=lambda a: True, + type='euclidean' ), "angular": Metric( distance=lambda a, b: 1 - np.dot(a, b) / (norm(a) * norm(b)), - distance_valid=lambda a: True + distance_valid=lambda a: True, + type='andular' ), } From 385681a55c7a4c8d84f07cfa0c45a70788f18492 Mon Sep 17 00:00:00 2001 From: randersenyb Date: Mon, 22 Jul 2024 18:29:59 -0700 Subject: [PATCH 2/2] Updated dashboard --- aerospike/AerospikeHDFDashboard.json | 365 +++++++++++++-------------- 1 file changed, 169 insertions(+), 196 deletions(-) diff --git a/aerospike/AerospikeHDFDashboard.json b/aerospike/AerospikeHDFDashboard.json index 1c90073c9..78c33b144 100644 --- a/aerospike/AerospikeHDFDashboard.json +++ b/aerospike/AerospikeHDFDashboard.json @@ -131,11 +131,9 @@ "justifyMode": "auto", "orientation": "auto", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": [], "fields": "dataset", - "values": true + "values": false }, "showPercentChange": false, "textMode": "value", @@ -206,11 +204,9 @@ "justifyMode": "auto", "orientation": "auto", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": [], "fields": "idxdist", - "values": true + "values": false }, "showPercentChange": false, "textMode": "value", @@ -281,11 +277,9 @@ "justifyMode": "auto", "orientation": "auto", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": [], "fields": "poprecs", - "values": true + "values": false }, "showPercentChange": false, "textMode": "value", @@ -356,11 +350,9 @@ "justifyMode": "auto", "orientation": "auto", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": [], "fields": "dims", - "values": true + "values": false }, "showPercentChange": false, "textMode": "value", @@ -434,7 +426,7 @@ "lastNotNull" ], "fields": "hnswparams", - "values": true + "values": false }, "showPercentChange": false, "textMode": "value", @@ -524,7 +516,7 @@ "reduceOptions": { "calcs": [], "fields": "", - "values": true + "values": false }, "showPercentChange": false, "textMode": "value", @@ -597,7 +589,7 @@ "reduceOptions": { "calcs": [], "fields": "/^fullname$/", - "values": true + "values": false }, "showPercentChange": false, "textMode": "value", @@ -670,7 +662,7 @@ "reduceOptions": { "calcs": [], "fields": "/^fullname$/", - "values": true + "values": false }, "showPercentChange": false, "textMode": "value", @@ -701,7 +693,7 @@ "type": "stat" }, { - "collapsed": false, + "collapsed": true, "gridPos": { "h": 1, "w": 24, @@ -709,131 +701,132 @@ "y": 9 }, "id": 660, - "panels": [], - "title": "Exceptions", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": -1, - "drawStyle": "line", - "fillOpacity": 16, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "stepAfter", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 0, - "fieldMinMax": false, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "locale" - }, - "overrides": [] - }, - "gridPos": { - "h": 13, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 640, - "options": { - "legend": { - "calcs": [ - "max", - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "10.4.2", - "targets": [ + "panels": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "disableTextWrap": false, - "editorMode": "code", - "exemplar": false, - "expr": "aerospike_hdf_exception{ns=\"$idxns\", instance=~\"$client\", idx=\"$idxname\", run=~\"$QueryRun\"}", - "format": "time_series", - "fullMetaSearch": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "{{instance}} - {{ns}}.{{idx}} Run: {{run}} {{exception_type}}", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": -1, + "drawStyle": "line", + "fillOpacity": 16, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "locale" + }, + "overrides": [] }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "aerospike_hdf_exception{ns=\"$namespace\", instance=~\"$client\", set=\"$set\"}", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "{{instance}} - {{ns}}.{{set}} {{exception_type}}", - "range": true, - "refId": "B", - "useBackend": false + "gridPos": { + "h": 13, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 640, + "options": { + "legend": { + "calcs": [ + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "aerospike_hdf_exception{ns=\"$idxns\", instance=~\"$client\", idx=\"$idxname\", run=~\"$QueryRun\"}", + "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{instance}} - {{ns}}.{{idx}} Run: {{run}} {{exception_type}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "aerospike_hdf_exception{ns=\"$namespace\", instance=~\"$client\", set=\"$set\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{instance}} - {{ns}}.{{set}} {{exception_type}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Exceptions", + "type": "timeseries" } ], "title": "Exceptions", - "type": "timeseries" + "type": "row" }, { "collapsed": false, @@ -841,7 +834,7 @@ "h": 1, "w": 24, "x": 0, - "y": 23 + "y": 10 }, "id": 651, "panels": [], @@ -904,7 +897,7 @@ "h": 5, "w": 3, "x": 0, - "y": 24 + "y": 11 }, "id": 649, "options": { @@ -913,11 +906,9 @@ "justifyMode": "auto", "orientation": "auto", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": [], "fields": "paused", - "values": true + "values": false }, "showPercentChange": false, "textMode": "auto", @@ -981,7 +972,7 @@ "h": 5, "w": 3, "x": 3, - "y": 24 + "y": 11 }, "id": 626, "options": { @@ -991,7 +982,7 @@ "reduceOptions": { "calcs": [], "fields": "", - "values": true + "values": false }, "showThresholdLabels": false, "showThresholdMarkers": false, @@ -1157,7 +1148,7 @@ "h": 9, "w": 24, "x": 0, - "y": 29 + "y": 16 }, "id": 638, "options": { @@ -1221,7 +1212,7 @@ "h": 1, "w": 24, "x": 0, - "y": 38 + "y": 25 }, "id": 652, "panels": [], @@ -1260,7 +1251,7 @@ "h": 5, "w": 3, "x": 0, - "y": 39 + "y": 26 }, "id": 646, "options": { @@ -1269,11 +1260,9 @@ "justifyMode": "auto", "orientation": "auto", "reduceOptions": { - "calcs": [ - "firstNotNull" - ], + "calcs": [], "fields": "queryruns", - "values": true + "values": false }, "showPercentChange": false, "textMode": "value", @@ -1335,7 +1324,7 @@ "h": 5, "w": 3, "x": 3, - "y": 39 + "y": 26 }, "id": 650, "options": { @@ -1344,11 +1333,9 @@ "justifyMode": "auto", "orientation": "auto", "reduceOptions": { - "calcs": [ - "firstNotNull" - ], + "calcs": [], "fields": "queries", - "values": true + "values": false }, "showPercentChange": false, "textMode": "value", @@ -1410,7 +1397,7 @@ "h": 5, "w": 3, "x": 6, - "y": 39 + "y": 26 }, "id": 647, "options": { @@ -1419,11 +1406,9 @@ "justifyMode": "auto", "orientation": "auto", "reduceOptions": { - "calcs": [ - "firstNotNull" - ], + "calcs": [], "fields": "querynbrlmt", - "values": true + "values": false }, "showPercentChange": false, "textMode": "value", @@ -1484,7 +1469,7 @@ "h": 5, "w": 2, "x": 9, - "y": 39 + "y": 26 }, "id": 659, "options": { @@ -1495,7 +1480,7 @@ "reduceOptions": { "calcs": [], "fields": "queryef", - "values": true + "values": false }, "showPercentChange": false, "textMode": "value", @@ -1558,7 +1543,7 @@ "h": 5, "w": 3, "x": 11, - "y": 39 + "y": 26 }, "id": 648, "options": { @@ -1568,7 +1553,7 @@ "reduceOptions": { "calcs": [], "fields": "", - "values": true + "values": false }, "showThresholdLabels": false, "showThresholdMarkers": false, @@ -1678,7 +1663,7 @@ "h": 5, "w": 2, "x": 14, - "y": 39 + "y": 26 }, "id": 653, "options": { @@ -1687,11 +1672,9 @@ "justifyMode": "auto", "orientation": "auto", "reduceOptions": { - "calcs": [ - "firstNotNull" - ], - "fields": "querymetric", - "values": true + "calcs": [], + "fields": "/^recalltype$/", + "values": false }, "showPercentChange": false, "text": {}, @@ -1706,9 +1689,9 @@ "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, - "editorMode": "builder", + "editorMode": "code", "exemplar": false, - "expr": "aerospike_hdf_heartbeat{idx=\"$idxname\", instance=~\"$client\", idxns=\"$idxns\"}", + "expr": "label_join(aerospike_hdf_heartbeat{idx=\"$idxname\", instance=~\"$client\", idxns=\"$idxns\"}, \"recalltype\", \"-\", \"anndist\",\"querymetric\")", "format": "table", "fullMetaSearch": false, "hide": false, @@ -1769,7 +1752,7 @@ "h": 5, "w": 2, "x": 16, - "y": 39 + "y": 26 }, "id": 654, "options": { @@ -1778,11 +1761,9 @@ "justifyMode": "auto", "orientation": "auto", "reduceOptions": { - "calcs": [ - "firstNotNull" - ], + "calcs": [], "fields": "querymetricvalue", - "values": true + "values": false }, "showPercentChange": false, "text": {}, @@ -1860,7 +1841,7 @@ "h": 5, "w": 3, "x": 18, - "y": 39 + "y": 26 }, "id": 661, "options": { @@ -1869,11 +1850,9 @@ "justifyMode": "auto", "orientation": "auto", "reduceOptions": { - "calcs": [ - "firstNotNull" - ], + "calcs": [], "fields": "querymetricaerospikevalue", - "values": true + "values": false }, "showPercentChange": false, "text": {}, @@ -1951,7 +1930,7 @@ "h": 5, "w": 3, "x": 21, - "y": 39 + "y": 26 }, "id": 657, "options": { @@ -1960,11 +1939,9 @@ "justifyMode": "auto", "orientation": "auto", "reduceOptions": { - "calcs": [ - "firstNotNull" - ], + "calcs": [], "fields": "/^querymetricbigvalue$/", - "values": true + "values": false }, "showPercentChange": false, "text": {}, @@ -2059,7 +2036,7 @@ "h": 13, "w": 24, "x": 0, - "y": 44 + "y": 31 }, "id": 610, "options": { @@ -2156,7 +2133,7 @@ "h": 15, "w": 24, "x": 0, - "y": 57 + "y": 44 }, "id": 656, "options": { @@ -2261,10 +2238,6 @@ }, { "current": {}, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, "definition": "label_values(aerospike_hdf_heartbeat,ns)", "hide": 0, "includeAll": true, @@ -2413,6 +2386,6 @@ "timezone": "", "title": "Aerospike HDF", "uid": "fzUPYeJIkhdf", - "version": 12, + "version": 18, "weekStart": "" } \ No newline at end of file