Skip to content

Commit

Permalink
address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
hmacr committed Jul 26, 2023
1 parent 5ad97d0 commit 26b8c96
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 5 deletions.
3 changes: 3 additions & 0 deletions src/marqo/tensor_search/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,6 @@
NON_OFFICIAL_LUCENE_SPECIAL_CHARS = {
' '
}

NUM_BYTES_IN_KB = 1024
SUPPORTED_SIZES_FOR_STATS = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
12 changes: 9 additions & 3 deletions src/marqo/tensor_search/tensor_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,17 @@ def _autofill_index_settings(index_settings: dict):

def get_stats(config: Config, index_name: str):
doc_count = HttpRequests(config).post(path=F"{index_name}/_count")["count"]
index_info = HttpRequests(config).get(path=F"_cat/indices/{index_name}?format=json")
size = index_info[0]["store.size"]
index_stats = HttpRequests(config).get(path=F"{index_name}/_stats")["indices"]
size_in_bytes = None
try:
size_in_bytes = index_stats[index_name]["total"]["store"]["size_in_bytes"]
except AttributeError:
raise errors.IndexNotFoundError(message="Tried to get a non-existent index: {}".format(index_name))

formatted_size = utils.convert_bytes_to_human_readable_format(size_in_bytes)
return {
"numberOfDocuments": doc_count,
"size": size
"size": formatted_size
}


Expand Down
7 changes: 7 additions & 0 deletions src/marqo/tensor_search/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import typing
import functools
import json
import math
from timeit import default_timer as timer
import torch
from marqo import errors
Expand Down Expand Up @@ -349,3 +350,9 @@ def is_tensor_field(field: str,
return field in tensor_fields
else:
return field not in non_tensor_fields


def convert_bytes_to_human_readable_format(size_in_bytes: int) -> str:
size_factor = math.floor(math.log(size_in_bytes) / math.log(constants.NUM_BYTES_IN_KB))
processed_size = size_in_bytes / math.pow(constants.NUM_BYTES_IN_KB, size_factor)
return f"{processed_size:.2f} {constants.SUPPORTED_SIZES_FOR_STATS[size_factor]}"
8 changes: 6 additions & 2 deletions tests/tensor_search/test_get_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ def test_get_stats_empty(self):
except IndexNotFoundError as s:
pass
tensor_search.create_vector_index(config=self.config, index_name=self.index_name_1)
assert tensor_search.get_stats(config=self.config, index_name=self.index_name_1)["numberOfDocuments"] == 0
index_stats = tensor_search.get_stats(config=self.config, index_name=self.index_name_1)
assert index_stats["numberOfDocuments"] == 0
assert len(index_stats["size"]) != 0

def test_get_stats_non_empty(self):
try:
Expand All @@ -35,4 +37,6 @@ def test_get_stats_non_empty(self):
auto_refresh=True, device="cpu"
)
)
assert tensor_search.get_stats(config=self.config, index_name=self.index_name_1)["numberOfDocuments"] == 3
index_stats = tensor_search.get_stats(config=self.config, index_name=self.index_name_1)
assert index_stats["numberOfDocuments"] == 3
assert len(index_stats["size"]) != 0
19 changes: 19 additions & 0 deletions tests/tensor_search/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,3 +398,22 @@ def test_is_tensor_field_providing_one_empty(self):
non_tensor_fields = []
with self.assertRaises(errors.InternalError):
utils.is_tensor_field('field1', tensor_fields=tensor_fields, non_tensor_fields=non_tensor_fields)

def test_convert_bytes_to_human_readable_format(self):
size_in_bytes = 1000 # 1000 B
assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "1000.00 B"

size_in_bytes = 16121 # 15.74 KB
assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "15.74 KB"

size_in_bytes = 9874321 # 9.42 MB
assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "9.42 MB"

size_in_bytes = 10000000000 # 9.31 GB
assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "9.31 GB"

size_in_bytes = 712893712304234 # 648.37 TB
assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "648.37 TB"

size_in_bytes = 6212893712323224 # 5.52 PB
assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "5.52 PB"

0 comments on commit 26b8c96

Please sign in to comment.