From e142d024ff2317356f6c3c8e64758c2bf609714b Mon Sep 17 00:00:00 2001 From: Xinyi Zou Date: Wed, 10 Jul 2024 00:11:18 +0800 Subject: [PATCH] [fix](memory) Disable Arrow Jemalloc (#37528) ## Proposed changes Currently, Arrow use separate Jemalloc, and use non-standard methods `mallocx`, `sdallocx`, `rallocx` in `memory_pool_jemalloc.cc` to optimize memory allocation. But this may be incompatible with older versions of the Linux kernel. when we use Arrow on Arm Kirin v10 or Centos 7.4, it will get stuck on the Jemalloc Lock, with the stack below, it will appear when calling `arrow::RecordBatch::MakeEmpty`. the kernel version of Arm Kylin v10 is 4.19.90, and the kernel version of Centos 7.4 is 4.14. ``` #0 0x0000ffffae3ceff8 in __lll_lock_wait () from /lib64/libpthread.so.0 #1 0x0000ffffae3c9b50 in pthread_mutex_lock () from /lib64/libpthread.so.0 #2 0x0000ffffae61c834 in pthread_mutex_lock () from /lib64/libc.so.6 #3 0x0000aaaac99bc1e0 in je_arrow_private_je_malloc_mutex_lock_slow () #4 0x0000aaaac99af3a4 in ?? () #5 0x0000aaaac99b576c in je_arrow_mallocx () #6 0x0000aaaac99a8aec in ?? () #7 0x0000aaaac99a9858 in arrow::AllocateResizableBuffer(long, arrow::MemoryPool*) () #8 0x0000aaaac399f8b8 in arrow::BufferBuilder::Resize(long, bool) () #9 0x0000aaaac983715c in arrow::BaseBinaryBuilder::Resize(long) () #10 0x0000aaaac39a47e0 in arrow::BaseBinaryBuilder::Append(unsigned char const*, int) () ``` After disable separate Jemalloc when compiling Arrow, the above error disappeared, and Arrow will use the default memory allocator, which is Doris Jemalloc. --- thirdparty/build-thirdparty.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh index 977406f1a121ef..c45fe62e08fc4a 100755 --- a/thirdparty/build-thirdparty.sh +++ b/thirdparty/build-thirdparty.sh @@ -1006,7 +1006,6 @@ build_arrow() { export ARROW_LZ4_URL="${TP_SOURCE_DIR}/${LZ4_NAME}" export ARROW_FLATBUFFERS_URL="${TP_SOURCE_DIR}/${FLATBUFFERS_NAME}" export ARROW_ZSTD_URL="${TP_SOURCE_DIR}/${ZSTD_NAME}" - export ARROW_JEMALLOC_URL="${TP_SOURCE_DIR}/${JEMALLOC_ARROW_NAME}" export ARROW_Thrift_URL="${TP_SOURCE_DIR}/${THRIFT_NAME}" export ARROW_SNAPPY_URL="${TP_SOURCE_DIR}/${SNAPPY_NAME}" export ARROW_ZLIB_URL="${TP_SOURCE_DIR}/${ZLIB_NAME}" @@ -1058,7 +1057,8 @@ build_arrow() { -DSnappy_LIB="${TP_INSTALL_DIR}/lib/libsnappy.a" -DSnappy_INCLUDE_DIR="${TP_INSTALL_DIR}/include" \ -DSnappy_SOURCE=SYSTEM \ -DBOOST_ROOT="${TP_INSTALL_DIR}" --no-warn-unused-cli \ - -Djemalloc_SOURCE=BUNDLED \ + -DARROW_JEMALLOC=OFF -DARROW_MIMALLOC=OFF \ + -DJEMALLOC_HOME="${TP_INSTALL_DIR}" \ -DARROW_THRIFT_USE_SHARED=OFF \ -DThrift_SOURCE=SYSTEM \ -DThrift_ROOT="${TP_INSTALL_DIR}" .. @@ -1067,12 +1067,10 @@ build_arrow() { "${BUILD_SYSTEM}" install #copy dep libs - cp -rf ./jemalloc_ep-prefix/src/jemalloc_ep/dist/lib/libjemalloc_pic.a "${TP_INSTALL_DIR}/lib64/libjemalloc_arrow.a" cp -rf ./brotli_ep/src/brotli_ep-install/lib/libbrotlienc-static.a "${TP_INSTALL_DIR}/lib64/libbrotlienc.a" cp -rf ./brotli_ep/src/brotli_ep-install/lib/libbrotlidec-static.a "${TP_INSTALL_DIR}/lib64/libbrotlidec.a" cp -rf ./brotli_ep/src/brotli_ep-install/lib/libbrotlicommon-static.a "${TP_INSTALL_DIR}/lib64/libbrotlicommon.a" strip_lib libarrow.a - strip_lib libjemalloc_arrow.a strip_lib libparquet.a }