From bc6a9c87e3afc495da26f80cbbd28c969f0e3dc5 Mon Sep 17 00:00:00 2001 From: Gregory Conrad Date: Wed, 12 Jul 2023 19:13:25 -0700 Subject: [PATCH] fix: attempt to use exp backoff for map size (#233) --- packages/mimir/native/src/embedded_milli.rs | 13 ++++---- .../mimir/native/src/embedded_milli/v1.rs | 32 +++++++++++++++++-- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/packages/mimir/native/src/embedded_milli.rs b/packages/mimir/native/src/embedded_milli.rs index d71b1e8..311ac32 100644 --- a/packages/mimir/native/src/embedded_milli.rs +++ b/packages/mimir/native/src/embedded_milli.rs @@ -27,17 +27,16 @@ pub(crate) type Document = serde_json::Map; // Represents a dump from a milli index type Dump = (MimirIndexSettings, Vec); -// The following two constants are for the map size used in heed/LMDB. +// The following constants are for the map size used in heed/LMDB. // We assume any OS we run on will have a page size less than 16 MiB (2^24) // and that 16 MiB will be a multiple of the OS page size (which it should be). -// Then, we find the maximum multiple of MAX_OS_PAGE_SIZE that is less than MAX_POSSIBLE_SIZE. // MAX_POSSIBLE_SIZE complies with memory constraints imposed by iOS without extra entitlements. -#[cfg(target_os = "ios")] -const MAX_POSSIBLE_SIZE: usize = 1_250_000_000; -#[cfg(not(target_os = "ios"))] const MAX_POSSIBLE_SIZE: usize = 2_000_000_000; const MAX_OS_PAGE_SIZE: usize = 16_777_216; -const MAX_MAP_SIZE: usize = MAX_POSSIBLE_SIZE - (MAX_POSSIBLE_SIZE % MAX_OS_PAGE_SIZE); +const MAX_METADATA_DB_SIZE: usize = 33_554_432; +// These are needed because of iOS nonsense; see: https://github.com/GregoryConrad/mimir/issues/227 +const MAP_EXP_BACKOFF_AMOUNT: f32 = 0.85; +const MAP_SIZE_TRIES: i32 = 8; /// Defines what an embedded instance of milli should be able to do. /// Essentially a wrapper around different versions of milli to expose a common API. @@ -97,7 +96,7 @@ impl Instance { fs::create_dir_all(&path)?; let env = heed::EnvOpenOptions::new() - .map_size(MAX_MAP_SIZE) + .map_size(MAX_METADATA_DB_SIZE) .max_dbs(128) .max_readers(4096) .open(&path)?; diff --git a/packages/mimir/native/src/embedded_milli/v1.rs b/packages/mimir/native/src/embedded_milli/v1.rs index 119e4e4..349ca9d 100644 --- a/packages/mimir/native/src/embedded_milli/v1.rs +++ b/packages/mimir/native/src/embedded_milli/v1.rs @@ -10,15 +10,43 @@ use milli::{ use crate::api::{Filter, MimirIndexSettings, SortBy, Synonyms, TermsMatchingStrategy}; -use super::{Document, Dump}; +use super::{ + Document, Dump, MAP_EXP_BACKOFF_AMOUNT, MAP_SIZE_TRIES, MAX_OS_PAGE_SIZE, MAX_POSSIBLE_SIZE, +}; pub(crate) struct EmbeddedMilli; impl super::EmbeddedMilli for EmbeddedMilli { fn create_index(dir: &std::path::Path) -> Result { std::fs::create_dir_all(dir)?; + + // We need this exponential backoff retry crap due to iOS' limited address space, + // *despite iOS being 64 bit*. See https://github.com/GregoryConrad/mimir/issues/227 + let mut map_size; + let mut retry = 0; + loop { + // Find the maximum multiple of MAX_OS_PAGE_SIZE that is less than curr_max_map_size. + let curr_max_map_size = + (MAX_POSSIBLE_SIZE as f32 * MAP_EXP_BACKOFF_AMOUNT.powi(retry)) as usize; + map_size = curr_max_map_size - (curr_max_map_size % MAX_OS_PAGE_SIZE); + let env_result = heed::EnvOpenOptions::new().map_size(map_size).open(dir); + match env_result { + Ok(env) => { + env.prepare_for_closing(); + break; + } + Err(_) if retry < MAP_SIZE_TRIES => { + retry += 1; + continue; + } + err @ Err(_) => { + err?; + } + }; + } + let mut options = heed::EnvOpenOptions::new(); - options.map_size(super::MAX_MAP_SIZE); + options.map_size(map_size); Index::new(options, dir).map_err(anyhow::Error::from) }