Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: containerize mongovector tests #1025

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 112 additions & 77 deletions vectorstores/mongovector/mongovector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@ package mongovector

import (
"context"
"errors"
"flag"
"fmt"
"net"
"net/url"
"os"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/testcontainers/testcontainers-go"
"github.com/testcontainers/testcontainers-go/wait"
"github.com/tmc/langchaingo/embeddings"
"github.com/tmc/langchaingo/schema"
"github.com/tmc/langchaingo/vectorstores"
Expand All @@ -20,12 +22,8 @@ import (
"go.mongodb.org/mongo-driver/v2/mongo/options"
)

// Run the test without setting up the test space.
//
//nolint:gochecknoglobals
var testWithoutSetup = flag.Bool("no-atlas-setup", false, "don't create required indexes")

const (
testURI = "MONGODB_VECTOR_TEST_URI"
testDB = "langchaingo-test"
testColl = "vstore"
testIndexDP1536 = "vector_index_dotProduct_1536"
Expand All @@ -35,34 +33,117 @@ const (
testIndexSize3 = 3
)

func TestMain(m *testing.M) {
flag.Parse()
type atlasContainer struct {
testcontainers.Container
URI string
}

defer func() {
os.Exit(m.Run())
}()
func setupAtlas(ctx context.Context) (*atlasContainer, error) {
req := testcontainers.ContainerRequest{
Image: "mongodb/mongodb-atlas-local",
ExposedPorts: []string{"27017/tcp"},
WaitingFor: wait.ForLog("Waiting for connections").WithStartupTimeout(1 * time.Second),
}

if *testWithoutSetup {
return
container, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{
ContainerRequest: req,
Started: true,
})
if err != nil {
return nil, err
}

// Create the required vector search indexes for the tests.
var atlasC *atlasContainer
if container != nil {
atlasC = &atlasContainer{Container: container}
}

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()
ip, err := container.Host(ctx)
if err != nil {
return atlasC, err
}

if err := resetForE2E(ctx, testIndexDP1536, testIndexSize1536, nil); err != nil {
fmt.Fprintf(os.Stderr, "setup failed for 1536: %v\n", err)
mappedPort, err := container.MappedPort(ctx, "27017")
if err != nil {
return atlasC, err
}

filters := []string{"pageContent"}
if err := resetForE2E(ctx, testIndexDP1536WithFilter, testIndexSize1536, filters); err != nil {
fmt.Fprintf(os.Stderr, "setup failed for 1536 w filter: %v\n", err)
uri := &url.URL{
Scheme: "mongodb",
Host: net.JoinHostPort(ip, mappedPort.Port()),
Path: "/",
RawQuery: "directConnection=true",
}

if err := resetForE2E(ctx, testIndexDP3, testIndexSize3, nil); err != nil {
fmt.Fprintf(os.Stderr, "setup failed for 3: %v\n", err)
atlasC.URI = uri.String()

return atlasC, nil
}

// resetVectorStore will reset the vector space defined by the given collection.
func resetVectorStore(t *testing.T, coll *mongo.Collection) {
t.Helper()

filter := bson.D{{Key: pageContentName, Value: bson.D{{Key: "$exists", Value: true}}}}

_, err := coll.DeleteMany(context.Background(), filter)
assert.NoError(t, err, "failed to reset vector store")
}

// setupTest will prepare the Atlas vector search for adding to and searching
// a vector space.
func setupTest(t *testing.T, dim int, index string) Store {
t.Helper()

uri := os.Getenv(testURI)
if uri == "" {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()

container, err := setupAtlas(ctx)
require.NoError(t, err)

uri = container.URI
os.Setenv(testURI, uri)
}

require.NotEmpty(t, uri, "URI required")

client, err := mongo.Connect(options.Client().ApplyURI(uri))
require.NoError(t, err, "failed to connect to MongoDB server")

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()

err = client.Ping(ctx, nil)
require.NoError(t, err, "failed to ping server")

time.Sleep(10 * time.Second) // Let the container warm up

ctx, cancel = context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()

err = resetForE2E(ctx, client, testIndexDP1536, testIndexSize1536, nil)
require.NoError(t, err)

filters := []string{"pageContent"}
err = resetForE2E(ctx, client, testIndexDP1536WithFilter, testIndexSize1536, filters)
require.NoError(t, err)

err = resetForE2E(ctx, client, testIndexDP3, testIndexSize3, nil)
require.NoError(t, err)

// Create the vectorstore collection
err = client.Database(testDB).CreateCollection(context.Background(), testColl)
require.NoError(t, err, "failed to create collection")

coll := client.Database(testDB).Collection(testColl)
resetVectorStore(t, coll)

emb := newMockEmbedder(dim)
store := New(coll, emb, WithIndex(index))

return store
}

func TestNew(t *testing.T) {
Expand Down Expand Up @@ -122,44 +203,6 @@ func TestNew(t *testing.T) {
}
}

// resetVectorStore will reset the vector space defined by the given collection.
func resetVectorStore(t *testing.T, coll *mongo.Collection) {
t.Helper()

filter := bson.D{{Key: pageContentName, Value: bson.D{{Key: "$exists", Value: true}}}}

_, err := coll.DeleteMany(context.Background(), filter)
assert.NoError(t, err, "failed to reset vector store")
}

// setupTest will prepare the Atlas vector search for adding to and searching
// a vector space.
func setupTest(t *testing.T, dim int, index string) Store {
t.Helper()

uri := os.Getenv("MONGODB_URI")
if uri == "" {
t.Skip("Must set MONGODB_URI to run test")
}

require.NotEmpty(t, uri, "MONGODB_URI required")

client, err := mongo.Connect(options.Client().ApplyURI(uri))
require.NoError(t, err, "failed to connect to MongoDB server")

// Create the vectorstore collection
err = client.Database(testDB).CreateCollection(context.Background(), testColl)
require.NoError(t, err, "failed to create collection")

coll := client.Database(testDB).Collection(testColl)
resetVectorStore(t, coll)

emb := newMockEmbedder(dim)
store := New(coll, emb, WithIndex(index))

return store
}

//nolint:paralleltest
func TestStore_AddDocuments(t *testing.T) {
store := setupTest(t, testIndexSize1536, testIndexDP1536)
Expand Down Expand Up @@ -536,27 +579,19 @@ func searchIndexExists(ctx context.Context, coll *mongo.Collection, idx string)
return false, fmt.Errorf("failed to list search indexes: %w", err)
}

if cursor == nil || cursor.Current == nil {
return false, nil
}

name := cursor.Current.Lookup("name").StringValue()
queryable := cursor.Current.Lookup("queryable").Boolean()

return name == idx && queryable, nil
}

func resetForE2E(ctx context.Context, idx string, dim int, filters []string) error {
uri := os.Getenv("MONGODB_URI")
if uri == "" {
return errors.New("MONGODB_URI required")
}

client, err := mongo.Connect(options.Client().ApplyURI(uri))
if err != nil {
return fmt.Errorf("failed to connect to server: %w", err)
}

defer func() { _ = client.Disconnect(ctx) }()

func resetForE2E(ctx context.Context, client *mongo.Client, idx string, dim int, filters []string) error {
// Create the vectorstore collection
err = client.Database(testDB).CreateCollection(ctx, testColl)
err := client.Database(testDB).CreateCollection(ctx, testColl)
if err != nil {
return fmt.Errorf("failed to create vector store collection: %w", err)
}
Expand Down Expand Up @@ -585,7 +620,7 @@ func resetForE2E(ctx context.Context, idx string, dim int, filters []string) err

_, err = createVectorSearchIndex(ctx, coll, idx, fields...)
if err != nil {
return fmt.Errorf("faield to create index: %w", err)
return fmt.Errorf("failed to create index: %w", err)
}

return nil
Expand Down