Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add warnings when the index size exceeds work_mem #200

Closed
wants to merge 10 commits into from
17 changes: 15 additions & 2 deletions src/hnsw/build.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <catalog/pg_type.h>
#include <executor/executor.h>
#include <funcapi.h>
#include <miscadmin.h>
#include <nodes/execnodes.h>
#include <storage/bufmgr.h>
#include <utils/array.h>
Expand Down Expand Up @@ -59,7 +60,7 @@
#define UpdateProgress(index, val) ((void)val)
#endif

static void AddTupleToUsearchIndex(ItemPointer tid, Datum *values, HnswBuildState *buildstate)
static void AddTupleToUsearchIndex(ItemPointer tid, Datum *values, HnswBuildState *buildstate, Relation index)
{
/* Detoast once for all calls */
usearch_error_t error = NULL;
Expand Down Expand Up @@ -92,6 +93,12 @@ static void AddTupleToUsearchIndex(ItemPointer tid, Datum *values, HnswBuildStat
if(buildstate->usearch_index != NULL) {
size_t capacity = usearch_capacity(buildstate->usearch_index, &error);
if(capacity == usearch_size(buildstate->usearch_index, &error)) {
CheckMem(maintenance_work_mem,
index,
buildstate->usearch_index,
2 * usearch_size(buildstate->usearch_index, &error),
"index size exceeded maintenance_work_mem during index construction, consider increasing "
"maintenance_work_mem");
usearch_reserve(buildstate->usearch_index, 2 * capacity, &error);
assert(error == NULL);
}
Expand Down Expand Up @@ -130,7 +137,7 @@ static void BuildCallback(

// todo:: the argument values is assumed to be a real[] or vector (they have the same layout)
// do proper type checking instead of this assumption and test int int arrays and others
AddTupleToUsearchIndex(tid, values, buildstate);
AddTupleToUsearchIndex(tid, values, buildstate, index);

/* Reset memory context */
MemoryContextSwitchTo(oldCtx);
Expand Down Expand Up @@ -453,6 +460,12 @@ static void BuildIndex(
// Unlock and release buffer
UnlockReleaseBuffer(buffer);
}
CheckMem(maintenance_work_mem,
index,
buildstate->usearch_index,
estimated_row_count,
"index size exceeded maintenance_work_mem during index construction, consider increasing "
"maintenance_work_mem");
usearch_reserve(buildstate->usearch_index, estimated_row_count, &error);
if(error != NULL) {
// There's not much we can do if free throws an error, but we want to preserve the contents of the first one
Expand Down
13 changes: 12 additions & 1 deletion src/hnsw/external_index.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,13 @@
#include "usearch.h"
#include "utils.h"

#if PG_VERSION_NUM >= 130000
#include <miscadmin.h>
#endif

static BlockNumber getBlockMapPageBlockNumber(uint32 *blockmap_page_group_index, int id);

static uint32 UsearchNodeBytes(usearch_metadata_t *metadata, int vector_bytes, int level)
uint32 UsearchNodeBytes(usearch_metadata_t *metadata, int vector_bytes, int level)
{
const int NODE_HEAD_BYTES = sizeof(usearch_label_t) + 4 /*sizeof dim */ + 4 /*sizeof level*/;
uint32 node_bytes = 0;
Expand Down Expand Up @@ -629,6 +633,13 @@ void *ldb_wal_index_node_retriever(void *ctxp, int id)
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
}

#if PG_VERSION_NUM >= 130000
CheckMem(work_mem,
NULL,
NULL,
0,
"Pinned more tuples during node retrieval than will fir in work_mem, cosider increasing work_mem");
#endif
fa_cache_insert(&ctx->fa_cache, id, nodepage->node);

return nodepage->node;
Expand Down
13 changes: 7 additions & 6 deletions src/hnsw/external_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,13 @@ typedef struct
HnswColumnType columnType;
} HnswInsertState;

void StoreExternalIndex(Relation index,
usearch_index_t external_index,
ForkNumber forkNum,
char *data,
usearch_init_options_t *opts,
size_t num_added_vectors);
uint32 UsearchNodeBytes(usearch_metadata_t *metadata, int vector_bytes, int level);
void StoreExternalIndex(Relation index,
usearch_index_t external_index,
ForkNumber forkNum,
char *data,
usearch_init_options_t *opts,
size_t num_added_vectors);

// add the fully constructed index tuple to the index via wal
// hdr is passed in so num_vectors, first_block_no, last_block_no can be updated
Expand Down
7 changes: 7 additions & 0 deletions src/hnsw/insert.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#endif
#include <float.h>
#include <math.h>
#include <miscadmin.h>
#include <storage/bufmgr.h>
#include <utils/array.h>
#include <utils/rel.h>
Expand Down Expand Up @@ -144,6 +145,12 @@ bool ldb_aminsert(Relation index,
assert(hdr->magicNumber == LDB_WAL_MAGIC_NUMBER);
ldb_dlog("Insert: at start num vectors is %d", hdr->num_vectors);

CheckMem(work_mem,
index,
uidx,
hdr->num_vectors,
"index size exceeded work_mem during insert, consider increasing work_mem");

usearch_reserve(uidx, hdr->num_vectors + 1, &error);
uint32 level = hnsw_generate_new_level(meta.connectivity);
if(error != NULL) {
Expand Down
12 changes: 12 additions & 0 deletions src/hnsw/scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "scan.h"

#include <access/relscan.h>
#include <miscadmin.h>
#include <pgstat.h>
#include <utils/rel.h>

Expand Down Expand Up @@ -192,6 +193,11 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
scanstate->labels = palloc(k * sizeof(usearch_label_t));
}

CheckMem(work_mem,
scan->indexRelation,
scanstate->usearch_index,
k,
"index size exceeded work_mem during scan, consider increasing work_mem");
ldb_dlog("LANTERN querying index for %d elements", k);
num_returned = usearch_search(
scanstate->usearch_index, vec, usearch_scalar_f32_k, k, scanstate->labels, scanstate->distances, &error);
Expand Down Expand Up @@ -227,6 +233,12 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
scanstate->distances = repalloc(scanstate->distances, k * sizeof(float));
scanstate->labels = repalloc(scanstate->labels, k * sizeof(usearch_label_t));

CheckMem(work_mem,
scan->indexRelation,
scanstate->usearch_index,
k,
"index size exceeded work_mem during scan, consider increasing work_mem");

ldb_dlog("LANTERN - querying index for %d elements", k);
num_returned = usearch_search(
scanstate->usearch_index, vec, usearch_scalar_f32_k, k, scanstate->labels, scanstate->distances, &error);
Expand Down
33 changes: 33 additions & 0 deletions src/hnsw/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,16 @@
#include "utils.h"

#include <assert.h>
#include <math.h>
#include <miscadmin.h>
#include <regex.h>
#include <string.h>

#if PG_VERSION_NUM >= 130000
#include <utils/memutils.h>
#endif

#include "external_index.h"
#include "hnsw.h"
#include "options.h"
#include "usearch.h"
Expand Down Expand Up @@ -48,3 +55,29 @@ usearch_label_t GetUsearchLabel(ItemPointer itemPtr)
memcpy((unsigned long *)&label, itemPtr, 6);
return label;
}

void CheckMem(int limit, Relation index, usearch_index_t uidx, uint32 n_nodes, char *msg)
{
uint32 node_size = 0;
if(index != NULL) {
usearch_error_t error;
double M = ldb_HnswGetM(index);
double mL = 1 / log(M);
usearch_metadata_t meta = usearch_metadata(uidx, &error);
// todo:: update sizeof(float) to correct vector size once #19 is merged
node_size = UsearchNodeBytes(&meta, meta.dimensions * sizeof(float), (int)round(mL + 1));
}
// todo:: there's figure out a way to check this in pg <= 12
#if PG_VERSION_NUM >= 130000
Size pg_mem = MemoryContextMemAllocated(CurrentMemoryContext, true);
#else
Size pg_mem = 0;
#endif

// The average number of layers for an element to be added in is mL+1 per section 4.2.2
// Accuracy could maybe be improved by not rounding
// This is a guess, but it's a reasonably good one
if(pg_mem + node_size * n_nodes > (uint32)limit * 1024UL) {
elog(WARNING, "%s", msg);
}
}
1 change: 1 addition & 0 deletions src/hnsw/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "options.h"
#include "usearch.h"

void CheckMem(int limit, Relation index, usearch_index_t uidx, uint32 n_nodes, char *msg);
void LogUsearchOptions(usearch_init_options_t *opts);
void PopulateUsearchOpts(Relation index, usearch_init_options_t *opts);
usearch_label_t GetUsearchLabel(ItemPointer itemPtr);
Expand Down
1 change: 1 addition & 0 deletions test/expected/hnsw_insert.out
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
---------------------------------------------------------------------
-- Test HNSW index inserts on empty table
---------------------------------------------------------------------
set work_mem = '10MB';
CREATE TABLE small_world (
id SERIAL PRIMARY KEY,
v REAL[2]
Expand Down
1 change: 1 addition & 0 deletions test/parallel/expected/insert.out
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
SET work_mem='10MB';
BEGIN;
INSERT INTO sift_base10k (id, v) VALUES
(nextval('serial'), random_array(128, 0, 128)),
Expand Down
1 change: 1 addition & 0 deletions test/parallel/expected/insert2.out
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
SET work_mem='10MB';
BEGIN;
INSERT INTO sift_base10k (id, v) VALUES
(nextval('serial'), random_array(128, 0, 128)),
Expand Down
1 change: 1 addition & 0 deletions test/parallel/expected/insert3.out
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
SET work_mem='10MB';
BEGIN;
INSERT INTO sift_base10k (id, v) VALUES
(nextval('serial'), random_array(128, 0, 128)),
Expand Down
1 change: 1 addition & 0 deletions test/parallel/sql/insert.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
SET work_mem='10MB';
BEGIN;
INSERT INTO sift_base10k (id, v) VALUES
(nextval('serial'), random_array(128, 0, 128)),
Expand Down
1 change: 1 addition & 0 deletions test/parallel/sql/insert2.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
SET work_mem='10MB';
BEGIN;
INSERT INTO sift_base10k (id, v) VALUES
(nextval('serial'), random_array(128, 0, 128)),
Expand Down
1 change: 1 addition & 0 deletions test/parallel/sql/insert3.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
SET work_mem='10MB';
BEGIN;
INSERT INTO sift_base10k (id, v) VALUES
(nextval('serial'), random_array(128, 0, 128)),
Expand Down
1 change: 1 addition & 0 deletions test/sql/hnsw_insert.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
---------------------------------------------------------------------
-- Test HNSW index inserts on empty table
---------------------------------------------------------------------
set work_mem = '10MB';

CREATE TABLE small_world (
id SERIAL PRIMARY KEY,
Expand Down