From 0e3754fbc4f35a7008cb95cf6939a374fd8d5077 Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Sat, 9 Sep 2023 22:53:03 -0700 Subject: [PATCH] [PERF] enable jemalloc with background threads (#1361) * Switches allocator to Jemalloc * Leverages background threads for linux with drop rate of 1 second * Use 65k size pages for linux arm64 --- .github/workflows/python-publish.yml | 2 ++ Cargo.lock | 22 +++++++++++++++++++ Cargo.toml | 4 ++++ src/daft-parquet/src/file.rs | 5 +++-- src/lib.rs | 33 +++++++++++++++++++++++++++- 5 files changed, 63 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index a7dcb6db28..90f76fad2a 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -74,6 +74,8 @@ jobs: # GCC 4.8.5 in manylinux2014 container doesn't support c11 atomic. This caused issues with the `ring` crate that causes TLS to fail container: messense/manylinux_2_24-cross:aarch64 args: --profile release-lto --out dist --sdist + env: + JEMALLOC_SYS_WITH_LG_PAGE: 16 - name: Build wheels - Mac aarch64 if: ${{ (matrix.os == 'macos') && (matrix.compile_arch == 'aarch64') }} diff --git a/Cargo.lock b/Cargo.lock index 440896e99b..a27c57e123 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1006,8 +1006,10 @@ dependencies = [ "daft-parquet", "daft-plan", "daft-table", + "libc", "pyo3", "pyo3-log", + "tikv-jemallocator", ] [[package]] @@ -3330,6 +3332,26 @@ dependencies = [ "weezl", ] +[[package]] +name = "tikv-jemalloc-sys" +version = "0.5.4+5.3.0-patched" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9402443cb8fd499b6f327e40565234ff34dbda27460c5b47db0db77443dd85d1" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "tikv-jemallocator" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "965fe0c26be5c56c94e38ba547249074803efd52adfb66de62107d95aab3eaca" +dependencies = [ + "libc", + "tikv-jemalloc-sys", +] + [[package]] name = "time" version = "0.1.45" diff --git a/Cargo.toml b/Cargo.toml index b245ba355f..33a87c7198 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,6 +49,10 @@ lto = 'fat' [profile.rust-analyzer] inherits = "dev" +[target.'cfg(not(target_env = "msvc"))'.dependencies] +libc = {version = "^0.2.8", default-features = false} +tikv-jemallocator = {version = "0.5.4", features = ["disable_initial_exec_tls"]} + [workspace] members = [ "src/common/error", diff --git a/src/daft-parquet/src/file.rs b/src/daft-parquet/src/file.rs index 16a3253420..899823b8c1 100644 --- a/src/daft-parquet/src/file.rs +++ b/src/daft-parquet/src/file.rs @@ -43,7 +43,8 @@ fn streaming_decompression>()); + drop(series_to_concat); let _ = send.send(concated); }); recv.await.context(OneShotRecvSnafu {})? diff --git a/src/lib.rs b/src/lib.rs index d0fa53d663..2339334461 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,37 @@ +#[cfg(not(target_env = "msvc"))] +use tikv_jemallocator::Jemalloc; + +#[cfg(not(target_env = "msvc"))] +#[global_allocator] +static GLOBAL: Jemalloc = Jemalloc; + +union U { + x: &'static u8, + y: &'static libc::c_char, +} + +#[cfg(target_env = "gnu")] +#[allow(non_upper_case_globals)] +#[export_name = "_rjem_malloc_conf"] +pub static malloc_conf: Option<&'static libc::c_char> = Some(unsafe { + U { + x: &b"oversize_threshold:1,background_thread:true,dirty_decay_ms:1000,muzzy_decay_ms:1000\0"[0], + } + .y +}); + +#[cfg(target_os = "macos")] +#[allow(non_upper_case_globals)] +#[export_name = "_rjem_malloc_conf"] +pub static malloc_conf: Option<&'static libc::c_char> = Some(unsafe { + U { + x: &b"oversize_threshold:1,background_thread:false,dirty_decay_ms:0,muzzy_decay_ms:0\0"[0], + } + .y +}); + #[cfg(feature = "python")] pub mod pylib { - use pyo3::prelude::*; #[pyfunction]