Skip to content

Commit

Permalink
Support detection of bthread mutex and FastMutex deadlock caused by d…
Browse files Browse the repository at this point in the history
…ouble lock
  • Loading branch information
chenBright committed Sep 22, 2024
1 parent 8fa216c commit b01486b
Show file tree
Hide file tree
Showing 9 changed files with 149 additions and 21 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/ci-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
- uses: ./.github/actions/install-all-dependences
- uses: ./.github/actions/init-make-config
with:
options: --cc=gcc --cxx=g++ --with-thrift --with-glog --with-rdma
options: --cc=gcc --cxx=g++ --with-thrift --with-glog --with-rdma --with-debug-mutex
- name: compile
run: |
make -j ${{env.proc_num}}
Expand All @@ -76,7 +76,7 @@ jobs:
export CC=gcc && export CXX=g++
mkdir build
cd build
cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON -DWITH_RDMA=ON ..
cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON -DWITH_RDMA=ON -DWITH_DEBUG_MUTEX=ON ..
- name: compile
run: |
cd build
Expand All @@ -86,7 +86,7 @@ jobs:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- run: bazel test --verbose_failures --define with_mesalink=false --define with_glog=true --define with_thrift=true -- //... -//example/...
- run: bazel test --verbose_failures --define with_mesalink=false --define with_glog=true --define with_thrift=true --define with_debug_mutex=true -- //... -//example/...

clang-compile-with-make:
runs-on: ubuntu-20.04
Expand Down Expand Up @@ -135,7 +135,7 @@ jobs:
- uses: ./.github/actions/install-all-dependences
- uses: ./.github/actions/init-make-config
with:
options: --cc=clang --cxx=clang++ --with-thrift --with-glog --with-rdma
options: --cc=clang --cxx=clang++ --with-thrift --with-glog --with-rdma --with-debug-mutex
- name: compile
run: |
make -j ${{env.proc_num}}
Expand All @@ -150,7 +150,7 @@ jobs:
export CC=clang && export CXX=clang++
mkdir build
cd build
cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON -DWITH_RDMA=ON ..
cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON -DWITH_RDMA=ON -DWITH_DEBUG_MUTEX=ON ..
- name: compile
run: |
cd build
Expand All @@ -160,7 +160,7 @@ jobs:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- run: bazel build --verbose_failures --action_env=CC=clang-12 --define with_mesalink=false --define with_glog=true --define with_thrift=true -- //... -//example/...
- run: bazel build --verbose_failures --action_env=CC=clang-12 --define with_mesalink=false --define with_glog=true --define with_thrift=true --define with_debug_mutex=true -- //... -//example/...

clang-unittest:
runs-on: ubuntu-20.04
Expand Down
5 changes: 4 additions & 1 deletion BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@ COPTS = [
}) + select({
"//bazel/config:brpc_with_rdma": ["-DBRPC_WITH_RDMA=1"],
"//conditions:default": [""],
})
}) + select({
"//bazel/config:brpc_with_debug_mutex": ["-DBRPC_DEBUG_MUTEX=1"],
"//conditions:default": ["-DBRPC_DEBUG_MUTEX=0"],
})

LINKOPTS = [
"-pthread",
Expand Down
8 changes: 7 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ option(WITH_DEBUG_SYMBOLS "With debug symbols" ON)
option(WITH_THRIFT "With thrift framed protocol supported" OFF)
option(WITH_SNAPPY "With snappy" OFF)
option(WITH_RDMA "With RDMA" OFF)
option(WITH_DEBUG_MUTEX "With debugging mutex" OFF)
option(BUILD_UNIT_TESTS "Whether to build unit tests" OFF)
option(BUILD_FUZZ_TESTS "Whether to build fuzz tests" OFF)
option(BUILD_BRPC_TOOLS "Whether to build brpc tools" ON)
Expand Down Expand Up @@ -66,6 +67,11 @@ if(WITH_DEBUG_SYMBOLS)
set(DEBUG_SYMBOL "-g")
endif()

set(WITH_DEBUG_MUTEX_VAL "0")
if(WITH_DEBUG_MUTEX)
set(WITH_DEBUG_MUTEX_VAL "1")
endif()

if(WITH_THRIFT)
set(THRIFT_CPP_FLAG "-DENABLE_THRIFT_FRAMED_PROTOCOL")
find_library(THRIFT_LIB NAMES thrift)
Expand Down Expand Up @@ -117,7 +123,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} -Wno-deprecated-declarations -Wno-inconsistent-missing-override")
endif()

set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} ${DEFINE_CLOCK_GETTIME} -DBRPC_WITH_GLOG=${WITH_GLOG_VAL} -DBRPC_WITH_RDMA=${WITH_RDMA_VAL} -DGFLAGS_NS=${GFLAGS_NS}")
set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} ${DEFINE_CLOCK_GETTIME} -DBRPC_WITH_GLOG=${WITH_GLOG_VAL} -DBRPC_WITH_RDMA=${WITH_RDMA_VAL} -DGFLAGS_NS=${GFLAGS_NS} -DBRPC_DEBUG_MUTEX=${WITH_DEBUG_MUTEX_VAL}")
if(WITH_MESALINK)
set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} -DUSE_MESALINK")
endif()
Expand Down
6 changes: 6 additions & 0 deletions bazel/config/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,10 @@ config_setting(
name = "brpc_with_boringssl",
define_values = {"BRPC_WITH_BORINGSSL": "true"},
visibility = ["//visibility:public"],
)

config_setting(
name = "brpc_with_debug_mutex",
define_values = {"with_debug_mutex": "true"},
visibility = ["//visibility:public"],
)
6 changes: 4 additions & 2 deletions config_brpc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,13 @@ else
LDD=ldd
fi

TEMP=`getopt -o v: --long headers:,libs:,cc:,cxx:,with-glog,with-thrift,with-rdma,with-mesalink,nodebugsymbols -n 'config_brpc' -- "$@"`
TEMP=`getopt -o v: --long headers:,libs:,cc:,cxx:,with-glog,with-thrift,with-rdma,with-mesalink,nodebugsymbols,with-debug-mutex -n 'config_brpc' -- "$@"`
WITH_GLOG=0
WITH_THRIFT=0
WITH_RDMA=0
WITH_MESALINK=0
DEBUGSYMBOLS=-g
BRPC_DEBUG_MUTEX=0

if [ $? != 0 ] ; then >&2 $ECHO "Terminating..."; exit 1 ; fi

Expand All @@ -68,6 +69,7 @@ while true; do
--with-rdma) WITH_RDMA=1; shift 1 ;;
--with-mesalink) WITH_MESALINK=1; shift 1 ;;
--nodebugsymbols ) DEBUGSYMBOLS=; shift 1 ;;
--with-debug-mutex ) BRPC_DEBUG_MUTEX=1; shift 1 ;;
-- ) shift; break ;;
* ) break ;;
esac
Expand Down Expand Up @@ -405,7 +407,7 @@ append_to_output "STATIC_LINKINGS=$STATIC_LINKINGS"
append_to_output "DYNAMIC_LINKINGS=$DYNAMIC_LINKINGS"

# CPP means C PreProcessing, not C PlusPlus
CPPFLAGS="-DBRPC_WITH_GLOG=$WITH_GLOG -DGFLAGS_NS=$GFLAGS_NS"
CPPFLAGS="-DBRPC_WITH_GLOG=$WITH_GLOG -DGFLAGS_NS=$GFLAGS_NS -DBRPC_DEBUG_MUTEX=$BRPC_DEBUG_MUTEX"

# Avoid over-optimizations of TLS variables by GCC>=4.8
# See: https://github.com/apache/brpc/issues/1693
Expand Down
105 changes: 96 additions & 9 deletions src/bthread/mutex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -777,10 +777,58 @@ const MutexInternal MUTEX_LOCKED_RAW = {{1},{0},0};
BAIDU_CASSERT(sizeof(unsigned) == sizeof(MutexInternal),
sizeof_mutex_internal_must_equal_unsigned);

#if BRPC_DEBUG_MUTEX
#define BTHREAD_MUTEX_RESET_OWNER \
((butil::atomic<mutex_owner_type>*)&m->owner.type) \
->store(OWNER_TYPE_NONE, butil::memory_order_relaxed)

#define BTHREAD_MUTEX_SET_OWNER \
do { \
TaskGroup* task_group = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_task_group); \
if (NULL != task_group && !task_group->is_current_main_task()) { \
m->owner.id = bthread_self(); \
((butil::atomic<mutex_owner_type>*)&m->owner.type) \
->store(OWNER_TYPE_BTHREAD, butil::memory_order_release); \
} else { \
m->owner.id = pthread_numeric_id(); \
((butil::atomic<mutex_owner_type>*)&m->owner.type) \
->store(OWNER_TYPE_PTHREAD, butil::memory_order_release); \
} \
} while(false)

// Check if the mutex has been locked by the current thread.
// Double lock on the same thread will cause deadlock.
#define BTHREAD_MUTEX_CHECK_OWNER \
mutex_owner_type owner_type = ((butil::atomic<mutex_owner_type>*)&m->owner.type) \
->load(butil::memory_order_acquire); \
bool double_lock = \
(owner_type == OWNER_TYPE_BTHREAD && m->owner.id == bthread_self()) || \
(owner_type == OWNER_TYPE_PTHREAD && m->owner.id == pthread_numeric_id()); \
if (double_lock) { \
butil::debug::StackTrace trace(true); \
LOG(ERROR) << "Detected deadlock caused by double lock of bthread_mutex_t:" \
<< std::endl << trace.ToString(); \
}
#else
#define BTHREAD_MUTEX_RESET_OWNER ((void)0)
#define BTHREAD_MUTEX_SET_OWNER ((void)0)
#define BTHREAD_MUTEX_CHECK_OWNER ((void)0)
#endif // BRPC_DEBUG_MUTEX

inline int mutex_trylock_impl(bthread_mutex_t* m) {
MutexInternal* split = (MutexInternal*)m->butex;
if (!split->locked.exchange(1, butil::memory_order_acquire)) {
BTHREAD_MUTEX_SET_OWNER;
return 0;
}
return EBUSY;
}

const int MAX_SPIN_ITER = 4;

inline int mutex_lock_contended_impl(
bthread_mutex_t* m, const struct timespec* __restrict abstime) {
inline int mutex_lock_contended_impl(bthread_mutex_t* __restrict m,
const struct timespec* __restrict abstime) {
BTHREAD_MUTEX_CHECK_OWNER;
// When a bthread first contends for a lock, active spinning makes sense.
// Spin only few times and only if local `rq' is empty.
TaskGroup* g = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_task_group);
Expand Down Expand Up @@ -813,12 +861,43 @@ inline int mutex_lock_contended_impl(
queue_lifo = true;
}
}
BTHREAD_MUTEX_SET_OWNER;
return 0;
}

#ifdef BTHREAD_USE_FAST_PTHREAD_MUTEX
namespace internal {

#if BRPC_DEBUG_MUTEX
#define FAST_PTHREAD_MUTEX_RESET_OWNER \
((butil::atomic<mutex_owner_type>*)&_owner.type) \
->store(OWNER_TYPE_NONE, butil::memory_order_relaxed)

#define FAST_PTHREAD_MUTEX_SET_OWNER \
_owner.id = pthread_numeric_id(); \
((butil::atomic<mutex_owner_type>*)&_owner.type) \
->store(OWNER_TYPE_PTHREAD,butil::memory_order_release)

// Check if the mutex has been locked by the current thread.
// Double lock on the same thread will cause deadlock.
#define FAST_PTHREAD_MUTEX_CHECK_OWNER \
mutex_owner_type owner_type = ((butil::atomic<mutex_owner_type>*)&_owner.type) \
->load(butil::memory_order_acquire); \
if (owner_type == OWNER_TYPE_PTHREAD && _owner.id == pthread_numeric_id()) { \
butil::debug::StackTrace trace(true); \
LOG(ERROR) << "Detected deadlock caused by double lock of FastPthreadMutex:" \
<< std::endl << trace.ToString(); \
}
#else
#define FAST_PTHREAD_MUTEX_RESET_OWNER ((void)0)
#define FAST_PTHREAD_MUTEX_SET_OWNER ((void)0)
#define FAST_PTHREAD_MUTEX_CHECK_OWNER ((void)0)
#endif // BRPC_DEBUG_MUTEX

FastPthreadMutex::FastPthreadMutex() : _futex(0) {
FAST_PTHREAD_MUTEX_RESET_OWNER;
}

int FastPthreadMutex::lock_contended() {
butil::atomic<unsigned>* whole = (butil::atomic<unsigned>*)&_futex;
while (whole->exchange(BTHREAD_MUTEX_CONTENDED) & BTHREAD_MUTEX_LOCKED) {
Expand All @@ -831,23 +910,27 @@ int FastPthreadMutex::lock_contended() {
}

void FastPthreadMutex::lock() {
FAST_PTHREAD_MUTEX_CHECK_OWNER;
auto split = (bthread::MutexInternal*)&_futex;
if (split->locked.exchange(1, butil::memory_order_acquire)) {
(void)lock_contended();
}
FAST_PTHREAD_MUTEX_SET_OWNER;
++tls_pthread_lock_count;
}

bool FastPthreadMutex::try_lock() {
auto split = (bthread::MutexInternal*)&_futex;
bool lock = !split->locked.exchange(1, butil::memory_order_acquire);
if (lock) {
FAST_PTHREAD_MUTEX_SET_OWNER;
++tls_pthread_lock_count;
}
return lock;
}

void FastPthreadMutex::unlock() {
FAST_PTHREAD_MUTEX_RESET_OWNER;
auto whole = (butil::atomic<unsigned>*)&_futex;
const unsigned prev = whole->exchange(0, butil::memory_order_release);
// CAUTION: the mutex may be destroyed, check comments before butex_create
Expand All @@ -857,6 +940,10 @@ void FastPthreadMutex::unlock() {
--tls_pthread_lock_count;
}

#undef FAST_PTHREAD_MUTEX_RESET_OWNER
#undef FAST_PTHREAD_MUTEX_SET_OWNER
#undef FAST_PTHREAD_MUTEX_CHECK_OWNER

} // namespace internal
#endif // BTHREAD_USE_FAST_PTHREAD_MUTEX

Expand All @@ -875,6 +962,7 @@ extern "C" {
int bthread_mutex_init(bthread_mutex_t* __restrict m,
const bthread_mutexattr_t* __restrict) {
bthread::make_contention_site_invalid(&m->csite);
BTHREAD_MUTEX_RESET_OWNER;
m->butex = bthread::butex_create_checked<unsigned>();
if (!m->butex) {
return ENOMEM;
Expand All @@ -889,20 +977,15 @@ int bthread_mutex_destroy(bthread_mutex_t* m) {
}

int bthread_mutex_trylock(bthread_mutex_t* m) {
bthread::MutexInternal* split = (bthread::MutexInternal*)m->butex;
if (!split->locked.exchange(1, butil::memory_order_acquire)) {
return 0;
}
return EBUSY;
return bthread::mutex_trylock_impl(m);
}

int bthread_mutex_lock_contended(bthread_mutex_t* m) {
return bthread::mutex_lock_contended_impl(m, NULL);
}

int bthread_mutex_lock(bthread_mutex_t* m) {
bthread::MutexInternal* split = (bthread::MutexInternal*)m->butex;
if (!split->locked.exchange(1, butil::memory_order_acquire)) {
if (0 == bthread::mutex_trylock_impl(m)) {
return 0;
}
// Don't sample when contention profiler is off.
Expand Down Expand Up @@ -965,6 +1048,7 @@ int bthread_mutex_unlock(bthread_mutex_t* m) {
saved_csite = m->csite;
bthread::make_contention_site_invalid(&m->csite);
}
BTHREAD_MUTEX_RESET_OWNER;
const unsigned prev = whole->exchange(0, butil::memory_order_release);
// CAUTION: the mutex may be destroyed, check comments before butex_create
if (prev == BTHREAD_MUTEX_LOCKED) {
Expand All @@ -982,6 +1066,9 @@ int bthread_mutex_unlock(bthread_mutex_t* m) {
bthread::submit_contention(saved_csite, unlock_end_ns);
return 0;
}
#undef BTHREAD_MUTEX_RESET_OWNER
#undef BTHREAD_MUTEX_SET_OWNER
#undef BTHREAD_MUTEX_CHECK_OWNER

#ifndef NO_PTHREAD_MUTEX_HOOK
int pthread_mutex_lock(pthread_mutex_t* __mutex) {
Expand Down
8 changes: 6 additions & 2 deletions src/bthread/mutex.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ extern int bthread_mutex_lock(bthread_mutex_t* mutex);
extern int bthread_mutex_timedlock(bthread_mutex_t* __restrict mutex,
const struct timespec* __restrict abstime);
extern int bthread_mutex_unlock(bthread_mutex_t* mutex);
extern bthread_t bthread_self(void);
__END_DECLS

namespace bthread {
Expand Down Expand Up @@ -71,14 +72,17 @@ namespace internal {
#ifdef BTHREAD_USE_FAST_PTHREAD_MUTEX
class FastPthreadMutex {
public:
FastPthreadMutex() : _futex(0) {}
~FastPthreadMutex() = default;
FastPthreadMutex();
void lock();
void unlock();
bool try_lock();
private:
DISALLOW_COPY_AND_ASSIGN(FastPthreadMutex);
int lock_contended();
// Note: Owner detection of the mutex comes with average execution
// slowdown of about 50%., so it is only used for debugging and is
// only available when the `BRPC_DEBUG_MUTEX' macro is defined.
mutex_owner_t _owner;
unsigned _futex;
};
#else
Expand Down
15 changes: 15 additions & 0 deletions src/bthread/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,28 @@ typedef struct {
size_t sampling_range;
} bthread_contention_site_t;

enum mutex_owner_type {
OWNER_TYPE_NONE = 0,
OWNER_TYPE_BTHREAD,
OWNER_TYPE_PTHREAD
};

struct mutex_owner_t {
mutex_owner_type type;
uint64_t id;
};

typedef struct bthread_mutex_t {
#if defined(__cplusplus)
bthread_mutex_t() : butex(NULL), csite{} {}
DISALLOW_COPY_AND_ASSIGN(bthread_mutex_t);
#endif
unsigned* butex;
bthread_contention_site_t csite;
// Note: Owner detection of the mutex comes with average execution
// slowdown of about 50%., so it is only used for debugging and is
// only available when the `BRPC_DEBUG_MUTEX' macro is defined.
mutex_owner_t owner;
} bthread_mutex_t;

typedef struct {
Expand Down
Loading

0 comments on commit b01486b

Please sign in to comment.