Skip to content

Commit

Permalink
Fixed index management tests
Browse files Browse the repository at this point in the history
  • Loading branch information
papa99do committed Aug 5, 2024
1 parent 1a5d466 commit 4664309
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def normalize(elem: ET.Element):
# Sort attributes and child elements to normalize
normalized = ET.Element(elem.tag, dict(sorted(elem.attrib.items())))
children = [normalize(child) for child in elem.findall('*')]
for child in sorted(children, key=lambda x: (x.tag, x.attrib)):
for child in sorted(children, key=lambda x: str((x.tag, x.attrib))):
normalized.append(child)
return normalized

Expand Down
54 changes: 25 additions & 29 deletions tests/core/index_management/test_index_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,29 @@
from pathlib import Path
from unittest import mock
from datetime import datetime
from unittest.mock import patch

import httpx

import xml.etree.ElementTree as ET

import pytest

from marqo import version
from marqo.core.exceptions import IndexExistsError, ApplicationNotInitializedError, InternalError
from marqo.core.exceptions import IndexNotFoundError
from marqo.core.index_management.index_management import IndexManagement
from marqo.core.index_management.vespa_application_package import MarqoConfig, VespaApplicationPackage, \
ApplicationPackageDeploymentSessionStore
from marqo.core.index_management.vespa_application_package import (MarqoConfig, VespaApplicationPackage,
ApplicationPackageDeploymentSessionStore)
from marqo.core.models.marqo_index import *
from marqo.core.models.marqo_index_request import FieldRequest
from marqo.core.vespa_schema import for_marqo_index_request as vespa_schema_factory
from marqo.s2_inference.s2_inference import get_model_properties_from_registry
from marqo.vespa.exceptions import VespaActivationConflictError
from marqo.vespa.models import VespaDocument
from tests.marqo_test import MarqoTestCase


class IndexResilienceError(Exception):
"""A custom exception to raise when an error is encountered during the index resilience test."""
pass


@pytest.mark.slowtest
class TestIndexManagement(MarqoTestCase):

def setUp(self):
Expand Down Expand Up @@ -156,16 +153,6 @@ def test_rollback(self):
self.index_management.rollback_vespa()

rolled_back_version = str(self.vespa_client.download_application())
self._assert_file_does_not_exist(rolled_back_version, 'marqo_config.json')

# rollback will back up the content in the latest version
# TODO how to test added config?
self._assert_file_exists(rolled_back_version, 'app_bak.tgz')
backup_dir = tempfile.mkdtemp()
with tarfile.open(os.path.join(rolled_back_version, 'app_bak.tgz'), mode='r:gz') as tar:
for member in tar.getmembers():
tar.extract(member, path=backup_dir)

# Test the rollback rolls back the configs and component jar files to previous version
expected_rolled_back_files = [
['services.xml'],
Expand All @@ -177,6 +164,15 @@ def test_rollback(self):
os.path.join(rolled_back_version, *file),
os.path.join(self._test_dir, 'existing_vespa_app', *file)
)
# marqo_config.json does not exist in the previous version, and it gets deleted
self._assert_file_does_not_exist(rolled_back_version, 'marqo_config.json')

# rollback backs up the content in the latest version,
self._assert_file_exists(rolled_back_version, 'app_bak.tgz')
backup_dir = tempfile.mkdtemp()
with tarfile.open(os.path.join(rolled_back_version, 'app_bak.tgz'), mode='r:gz') as tar:
for member in tar.getmembers():
tar.extract(member, path=backup_dir)

# Test the rollback backs up file in the latest version
expected_backup_files = [
Expand All @@ -189,12 +185,6 @@ def test_rollback(self):
os.path.join(latest_version, *file)
)

def test_distributed_lock(self):
pass

def test_prefixes(self):
pass

def test_index_operation_fails_if_disabled(self):
# Create an index management instance with index operation disabled (by default)
self.index_management = IndexManagement(self.vespa_client, zookeeper_client=None)
Expand Down Expand Up @@ -237,7 +227,7 @@ def test_index_operation_fails_if_not_bootstrapped(self):

def test_create_and_delete_index_successful(self):
# merge batch create and delete happy path to save some testing time
request = self.unstructured_marqo_index_request()
request = self.unstructured_marqo_index_request(model=Model(name='hf/e5-small'))
schema, index = vespa_schema_factory(request).generate_schema()
self.index_management.bootstrap_vespa()
self.index_management.create_index(request)
Expand Down Expand Up @@ -366,12 +356,18 @@ def _assert_files_not_equal(self, path1: str, path2: str):
f'Expect file {path1} and {path2} to have different content, but they are the same')

def _assert_index_is_present(self, app, expected_index, expected_schema):
if 'version' not in expected_index:
expected_index = expected_index.copy(update={'version': 1})

# assert index setting exists and equals to expected value
saved_index = self.index_management.get_index(expected_index.name)
self.assertEqual(saved_index, expected_index)
exclude_fields = {'model', 'version'}
self.assertEqual(saved_index.dict(exclude=exclude_fields), expected_index.dict(exclude=exclude_fields))
self.assertEqual(saved_index.version, 1)

# asser that the prefixes are set correctly
model_properties = get_model_properties_from_registry(saved_index.model.name)
if 'text_chunk_prefix' in model_properties:
self.assertEqual(saved_index.model.text_chunk_prefix, model_properties['text_chunk_prefix'])
if 'text_query_prefix' in model_properties:
self.assertEqual(saved_index.model.text_query_prefix, model_properties['text_query_prefix'])

# assert schema file exists and has expected value
schema_name = expected_index.schema_name
Expand Down
3 changes: 2 additions & 1 deletion tests/core/index_management/test_index_setting_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from marqo.core.exceptions import OperationConflictError
from marqo.core.index_management.vespa_application_package import IndexSettingStore
from marqo.core.models.marqo_index import Field, FieldType, MarqoIndex
from marqo.core.models.marqo_index import Field, FieldType, MarqoIndex, Model
from tests.marqo_test import MarqoTestCase


Expand All @@ -16,6 +16,7 @@ def _get_index(self, index_name: str = 'index1', version: Optional[int] = None)
return self.structured_marqo_index(
name=index_name,
schema_name="schema1",
model=Model(name='hf/e5-small'),
fields=[
Field(name='title', type=FieldType.Text)
],
Expand Down
53 changes: 53 additions & 0 deletions tests/core/index_management/test_services_xml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import textwrap
import unittest
from string import Template

import pytest

from marqo.core.index_management.vespa_application_package import ServiceXml


@pytest.mark.unnittest
class TestIndexSettingStore(unittest.TestCase):

_TEMPLATE = Template(textwrap.dedent("""<?xml version="1.0" encoding="utf-8" ?>
<services version="1.0" xmlns:deploy="vespa" xmlns:preprocess="properties">
<content id="content_default" version="1.0">
<documents>$documents</documents>
</content>
</services>
"""))

def test_compare_element_should_return_true_when_equals_semantically(self):

xml1 = self._TEMPLATE.substitute(documents="""
<document type="marqo__settings" mode="index"/>
<document type="marqo__existing_00index" mode="index"/>
""")

# we changed the order of the document sub elements and its attributes in documents
# This is still the same semantically
xml2 = self._TEMPLATE.substitute(documents="""
<document type="marqo__existing_00index" mode="index"/>
<document mode="index" type="marqo__settings"></document>
""")

self.assertTrue(ServiceXml(xml1).compare_element(ServiceXml(xml2), 'content/documents'))

def test_compare_element_should_return_false_when_not_equal_semantically(self):
xml1 = self._TEMPLATE.substitute(documents="""
<document type="marqo__existing_00index" mode="index"/>
""")
# we changed the order of the document sub elements and its attributes in documents
# This is still the same semantically
for test_case in [
"""<document type="marqo__existing_00index_01" mode="index"/>""", # different document
"""""", # no documents
# an extra documents
"""<document type="marqo__existing_00index" mode="index"/><document mode="index" type="marqo__settings"/>""",
]:
with self.subTest():
xml2 = self._TEMPLATE.substitute(documents=test_case)
self.assertFalse(ServiceXml(xml1).compare_element(ServiceXml(xml2), 'content/documents'))


0 comments on commit 4664309

Please sign in to comment.